class Spidy::Connector::Html

Mechanize wrapper

Attributes

agent[R]

Public Class Methods

new(user_agent:) click to toggle source
# File lib/spidy/connector/html.rb, line 9
def initialize(user_agent:)
  @agent = Mechanize.new
  @user_agent = user_agent
  @agent.user_agent = user_agent
end

Public Instance Methods

call(url, encoding: nil, retry_count: 5, &yielder) click to toggle source
# File lib/spidy/connector/html.rb, line 17
def call(url, encoding: nil, retry_count: 5, &yielder)
  fail 'url is not specified' if url.blank?
  if encoding
    agent.default_encoding = encoding
    agent.force_default_encoding = true
  end
  connect(url, retry_count, yielder)
end
refresh!() click to toggle source
# File lib/spidy/connector/html.rb, line 26
def refresh!
  @agent = Mechanize.new
  @agent.user_agent = @user_agent
end

Private Instance Methods

connect(url, retry_count, yielder) click to toggle source
# File lib/spidy/connector/html.rb, line 33
def connect(url, retry_count, yielder)
  result = nil
  agent.get(url) do |page|
    fail Spidy::Connector::Retry, object: page, response_code: page.try(:response_code) if page.title == 'Sorry, unable to access page...'

    result = yielder.call(page)
  end
  result
rescue Mechanize::ResponseCodeError => e
  raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '429'
  raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '502'
  raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code)
end