class Spidy::Connector::Html
Mechanize wrapper
Attributes
agent[R]
Public Class Methods
new(user_agent:)
click to toggle source
# File lib/spidy/connector/html.rb, line 9 def initialize(user_agent:) @agent = Mechanize.new @user_agent = user_agent @agent.user_agent = user_agent end
Public Instance Methods
call(url, encoding: nil, retry_count: 5, &yielder)
click to toggle source
# File lib/spidy/connector/html.rb, line 17 def call(url, encoding: nil, retry_count: 5, &yielder) fail 'url is not specified' if url.blank? if encoding agent.default_encoding = encoding agent.force_default_encoding = true end connect(url, retry_count, yielder) end
refresh!()
click to toggle source
# File lib/spidy/connector/html.rb, line 26 def refresh! @agent = Mechanize.new @agent.user_agent = @user_agent end
Private Instance Methods
connect(url, retry_count, yielder)
click to toggle source
# File lib/spidy/connector/html.rb, line 33 def connect(url, retry_count, yielder) result = nil agent.get(url) do |page| fail Spidy::Connector::Retry, object: page, response_code: page.try(:response_code) if page.title == 'Sorry, unable to access page...' result = yielder.call(page) end result rescue Mechanize::ResponseCodeError => e raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '429' raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '502' raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) end