class Husc
Constants
- VERSION
Attributes
code[R]
html[R]
params[R]
tables[R]
url[R]
Public Class Methods
new(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10)
click to toggle source
# File lib/husc.rb, line 38 def initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10) ## -----*----- コンストラクタ -----*----- ## @agent = Mechanize.new @agent.keep_alive = false @agent.user_agent = user_agent unless user_agent.nil? @agent.request_headers = request_headers unless request_headers.nil? @agent.read_timeout = timeout if !url.nil? get(url) elsif !doc.nil? @html = doc.to_html @doc = doc table_to_hash else update_params(html) @html = html end @params = [] end
Public Instance Methods
attr(name)
click to toggle source
# File lib/husc.rb, line 197 def attr(name) ## -----*----- ノードの属性情報取得 -----*----- ## ret = @doc.attr(name) if ret.nil? return '' else return ret end end
css(locator, single = false)
click to toggle source
# File lib/husc.rb, line 154 def css(locator, single = false) ## -----*----- HTMLからCSSセレクタで要素取得 -----*----- ## elements = CrawlArray.new(@doc.css(locator).map {|el| Husc.new(doc: el)}) if single # シングルノード if elements[0] == nil return CrawlArray.new() else return elements[0] end else # 複数ノード return elements end end
get(url)
click to toggle source
# File lib/husc.rb, line 60 def get(url) ## -----*----- ページ推移 -----*----- ## @url = url begin page = @agent.get(@url) @code = page.code rescue Mechanize::ResponseCodeError => e @code = e.page.body rescue Net::HTTP::Persistent::Error => e puts e end html = page.content.toutf8 update_params(html) end
inner_html(shaping = true)
click to toggle source
# File lib/husc.rb, line 179 def inner_html(shaping = true) ## -----*----- タグ内のHTMLを取得 -----*----- ## if shaping return shaping_string(@doc.inner_html) else @doc.inner_html end end
inner_text(shaping = true)
click to toggle source
# File lib/husc.rb, line 170 def inner_text(shaping = true) ## -----*----- タグ内の文字列を取得 -----*----- ## if shaping return shaping_string(@doc.inner_text) else @doc.inner_text end end
send(opts)
click to toggle source
# File lib/husc.rb, line 75 def send(opts) ## -----*----- フォームデータ指定 -----*----- ## # # テキスト,数値など => value(String)を指定 # チェックボックス => check(Bool)を指定 # ファイルアップロード => file(String)を指定 # ボタンクリック => click(Bool)を指定 @params << {} opts = opts.map { |k, v| [k.to_sym, v] }.to_h opts.each { |k, v| @params[-1][k.to_sym] = v } end
submit(opts)
click to toggle source
# File lib/husc.rb, line 87 def submit(opts) ## -----*----- フォーム送信 -----*----- ## # フォーム指定 opts = opts.map { |k,v| [k.to_sym, v] }.to_h if opts.kind_of?(Integer) form = @agent.page.forms[opts] else form = @agent.page.form(**opts) end return if form.nil? button = nil @params.each do |param| # テキスト,数値など if param.include?(:value) && !param.include?(:check) value = param.delete(:value) next if value.nil? form.field_with(**param).value = value unless form.field_with(**param).nil? end # チェックボックス if param.include?(:check) check = param.delete(:check) next if check.nil? if check form.checkbox_with(**param).check unless form.checkbox_with(**param).nil? else form.checkbox_with(**param).uncheck unless form.checkbox_with(**param).nil? end end # ファイルアップロード if param.include?(:file) file = param.delete(:file) next if file.nil? || !File.exist?(file) form.file_upload_with(**param).file_name = file unless form.file_upload_with(**param).nil? end # ボタンクリック if param.include?(:click) click = param.delete(:click) next unless click button = form.button_with(**param) unless form.button_with(**param).nil? end end form = @agent.submit(form, button) update_params(form.content.toutf8) @params = [] end
text(shaping = true)
click to toggle source
# File lib/husc.rb, line 188 def text(shaping = true) ## -----*----- タグ内の文字列(その他タグ除去)を取得 -----*----- ## if shaping return shaping_string(@doc.text) else @doc.text end end
xpath(locator, single = false)
click to toggle source
# File lib/husc.rb, line 138 def xpath(locator, single = false) ## -----*----- HTMLからXPath指定で要素取得 -----*----- ## elements = CrawlArray.new(@doc.xpath(locator).map {|el| Husc.new(doc: el)}) if single # シングルノード if elements[0] == nil return CrawlArray.new() else return elements[0] end else # 複数ノード return elements end end
Private Instance Methods
shaping_string(str)
click to toggle source
# File lib/husc.rb, line 234 def shaping_string(str) ## -----*----- 文字例の整形 -----*----- ## # 余計な改行,空白を全て削除 str = str.to_s return str.gsub(" ", ' ').squeeze(' ').gsub("\n \n", "\n").gsub("\n ", "\n").gsub("\r", "\n").squeeze("\n").gsub("\t", "").strip end
table_to_hash()
click to toggle source
# File lib/husc.rb, line 223 def table_to_hash ## -----*----- テーブル内容をHashに変換 -----*----- ## @tables = {} @doc.css('tr').each do |tr| @tables[tr.css('th').inner_text.gsub("\n", "").gsub(" ", "")] = shaping_string(tr.css('td').inner_text) end @doc.css('dl').each do |el| @tables[el.css('dt').inner_text.gsub("\n", "").gsub(" ", "")] = shaping_string(el.css('dd').inner_text) end end
update_params(html)
click to toggle source
# File lib/husc.rb, line 211 def update_params(html) ## -----*----- パラメータを更新 -----*----- ## if @agent.respond_to?(:uri) @url = @agent.page.uri else @url = '' end @html = html @doc = Nokogiri::HTML.parse(@html) table_to_hash end