class Husc

Constants

VERSION

Attributes

code[R]
html[R]
params[R]
tables[R]
url[R]

Public Class Methods

new(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10) click to toggle source
# File lib/husc.rb, line 38
def initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10)
  ## -----*----- コンストラクタ -----*----- ##
  @agent = Mechanize.new
  @agent.keep_alive = false
  @agent.user_agent = user_agent  unless user_agent.nil?
  @agent.request_headers = request_headers  unless request_headers.nil?
  @agent.read_timeout = timeout

  if !url.nil?
    get(url)
  elsif !doc.nil?
    @html = doc.to_html
    @doc = doc
    table_to_hash
  else
    update_params(html)
    @html = html
  end

  @params = []
end

Public Instance Methods

attr(name) click to toggle source
# File lib/husc.rb, line 197
def attr(name)
  ## -----*----- ノードの属性情報取得 -----*----- ##
  ret = @doc.attr(name)
  if ret.nil?
    return ''
  else
    return ret
  end
end
css(locator, single = false) click to toggle source
# File lib/husc.rb, line 154
def css(locator, single = false)
  ## -----*----- HTMLからCSSセレクタで要素取得 -----*----- ##
  elements = CrawlArray.new(@doc.css(locator).map {|el| Husc.new(doc: el)})
  if single
    # シングルノード
    if elements[0] == nil
      return CrawlArray.new()
    else
      return elements[0]
    end
  else
    # 複数ノード
    return elements
  end
end
get(url) click to toggle source
# File lib/husc.rb, line 60
def get(url)
  ## -----*----- ページ推移 -----*----- ##
  @url = url
  begin
    page = @agent.get(@url)
    @code = page.code
  rescue Mechanize::ResponseCodeError => e
    @code = e.page.body
  rescue Net::HTTP::Persistent::Error => e
    puts e
  end
  html = page.content.toutf8
  update_params(html)
end
inner_html(shaping = true) click to toggle source
# File lib/husc.rb, line 179
def inner_html(shaping = true)
  ## -----*----- タグ内のHTMLを取得 -----*----- ##
  if shaping
    return shaping_string(@doc.inner_html)
  else
    @doc.inner_html
  end
end
inner_text(shaping = true) click to toggle source
# File lib/husc.rb, line 170
def inner_text(shaping = true)
  ## -----*----- タグ内の文字列を取得 -----*----- ##
  if shaping
    return shaping_string(@doc.inner_text)
  else
    @doc.inner_text
  end
end
send(opts) click to toggle source
# File lib/husc.rb, line 75
def send(opts)
  ## -----*----- フォームデータ指定 -----*----- ##
  #
  # テキスト,数値など   => value(String)を指定
  # チェックボックス    => check(Bool)を指定
  # ファイルアップロード  => file(String)を指定
  # ボタンクリック        => click(Bool)を指定
  @params << {}
  opts = opts.map { |k, v| [k.to_sym, v] }.to_h
  opts.each { |k, v| @params[-1][k.to_sym] = v }
end
submit(opts) click to toggle source
# File lib/husc.rb, line 87
def submit(opts)
  ## -----*----- フォーム送信 -----*----- ##
  # フォーム指定
  opts = opts.map { |k,v| [k.to_sym, v] }.to_h
  if opts.kind_of?(Integer)
    form = @agent.page.forms[opts]
  else
    form = @agent.page.form(**opts)
  end
  return if form.nil?
  button = nil

  @params.each do |param|
    # テキスト,数値など
    if param.include?(:value) && !param.include?(:check)
      value = param.delete(:value)
      next if value.nil?
      form.field_with(**param).value = value unless form.field_with(**param).nil?
    end

    # チェックボックス
    if param.include?(:check)
      check = param.delete(:check)
      next if check.nil?
      if check
        form.checkbox_with(**param).check unless form.checkbox_with(**param).nil?
      else
        form.checkbox_with(**param).uncheck unless form.checkbox_with(**param).nil?
      end
    end

    # ファイルアップロード
    if param.include?(:file)
      file = param.delete(:file)
      next if file.nil? || !File.exist?(file)
      form.file_upload_with(**param).file_name = file unless form.file_upload_with(**param).nil?
    end

    # ボタンクリック
    if param.include?(:click)
      click = param.delete(:click)
      next unless click
      button = form.button_with(**param) unless form.button_with(**param).nil?
    end
  end

  form = @agent.submit(form, button)
  update_params(form.content.toutf8)
  @params = []
end
text(shaping = true) click to toggle source
# File lib/husc.rb, line 188
def text(shaping = true)
  ## -----*----- タグ内の文字列(その他タグ除去)を取得 -----*----- ##
  if shaping
    return shaping_string(@doc.text)
  else
    @doc.text
  end
end
xpath(locator, single = false) click to toggle source
# File lib/husc.rb, line 138
def xpath(locator, single = false)
  ## -----*----- HTMLからXPath指定で要素取得 -----*----- ##
  elements = CrawlArray.new(@doc.xpath(locator).map {|el| Husc.new(doc: el)})
  if single
    # シングルノード
    if elements[0] == nil
      return CrawlArray.new()
    else
      return elements[0]
    end
  else
    # 複数ノード
    return elements
  end
end

Private Instance Methods

shaping_string(str) click to toggle source
# File lib/husc.rb, line 234
def shaping_string(str)
  ## -----*----- 文字例の整形 -----*----- ##
  # 余計な改行,空白を全て削除
  str = str.to_s
  return str.gsub(" ", ' ').squeeze(' ').gsub("\n \n", "\n").gsub("\n ", "\n").gsub("\r", "\n").squeeze("\n").gsub("\t", "").strip
end
table_to_hash() click to toggle source
# File lib/husc.rb, line 223
def table_to_hash
  ## -----*----- テーブル内容をHashに変換 -----*----- ##
  @tables = {}
  @doc.css('tr').each do |tr|
    @tables[tr.css('th').inner_text.gsub("\n", "").gsub(" ", "")] = shaping_string(tr.css('td').inner_text)
  end
  @doc.css('dl').each do |el|
    @tables[el.css('dt').inner_text.gsub("\n", "").gsub(" ", "")] = shaping_string(el.css('dd').inner_text)
  end
end
update_params(html) click to toggle source
# File lib/husc.rb, line 211
def update_params(html)
  ## -----*----- パラメータを更新 -----*----- ##
  if @agent.respond_to?(:uri)
    @url = @agent.page.uri
  else
    @url = ''
  end
  @html = html
  @doc = Nokogiri::HTML.parse(@html)
  table_to_hash
end