class Aranha::Processor

Constants

DEFAULT_MAX_TRIES

Attributes

manager[R]

Public Class Methods

new(manager = nil) click to toggle source
# File lib/aranha/processor.rb, line 14
def initialize(manager = nil)
  @manager = manager || ::Aranha::Manager.default
  @failed = {}
  @try = 0
  self.manager.init
  process_loop
  raise "Addresses failed: #{@failed.count}" if @failed.any?
end

Private Instance Methods

max_tries() click to toggle source
# File lib/aranha/processor.rb, line 80
def max_tries
  @max_tries ||= begin
    r = Integer(ENV['ARANHA_MAX_TRIES'])
    r <= 0 ? 0 : r
                 rescue ArgumentError, TypeError
                   DEFAULT_MAX_TRIES
  end
end
max_tries_s() click to toggle source
# File lib/aranha/processor.rb, line 76
def max_tries_s
  max_tries <= 0 ? 'INF' : max_tries
end
next_address() click to toggle source
# File lib/aranha/processor.rb, line 64
def next_address
  unprocessed.where.not(id: not_try_ids).first
end
not_try_ids() click to toggle source
# File lib/aranha/processor.rb, line 72
def not_try_ids
  @failed.select { |_k, v| v > @try }.map { |k, _v| k }
end
process_address(address) click to toggle source
# File lib/aranha/processor.rb, line 45
def process_address(address)
  manager.log_info("Processing #{address} (Try: #{@try}/#{max_tries_s}," \
      " Unprocessed: #{unprocessed.count}/#{::Aranha::Manager.default.addresses_count})")
  ap = ::Aranha::AddressProcessor.new(address)
  if ap.successful?
    @failed.delete(ap.address.id)
  else
    process_exception(ap)
  end
end
process_exception(address_processor) click to toggle source
# File lib/aranha/processor.rb, line 56
def process_exception(address_processor)
  raise address_processor.error unless address_processor.rescuable_error?

  @failed[address_processor.address.id] ||= 0
  @failed[address_processor.address.id] += 1
  manager.log_warn(address_processor.error)
end
process_loop() click to toggle source
# File lib/aranha/processor.rb, line 25
def process_loop
  manager.log_info("Max tries: #{max_tries_s}")
  loop do
    break if process_next_address
  end
end
process_next_address() click to toggle source
# File lib/aranha/processor.rb, line 32
def process_next_address
  a = next_address
  if a
    process_address(a)
    false
  elsif @failed.any?
    @try += 1
    max_tries.positive? && @try >= max_tries
  else
    true
  end
end
unprocessed() click to toggle source
# File lib/aranha/processor.rb, line 68
def unprocessed
  ::Aranha::Manager.default.unprocessed_addresses
end