class Wayfarer::Crawl

Attributes

dispatcher[R]

@!attribute [r] dispatcher

job[R]

The prepared job. @!attribute [r] job

Public Class Methods

new(job, *uris) click to toggle source
# File lib/wayfarer/crawl.rb, line 21
def initialize(job, *uris)
  @job = job.prepare
  @uris = uris
  @dispatcher = Dispatcher.new(@job)
  @processor = Processor.new(@job, frontier, @dispatcher)
end

Public Instance Methods

execute() click to toggle source
# File lib/wayfarer/crawl.rb, line 28
def execute
  trap_signals

  CrawlObserver.new(@processor, @dispatcher, config.logger)

  @job.run_hook(:before_crawl)
  @processor.run(*@uris)
  @job.run_hook(:after_crawl)
ensure
  untrap_signals
end
frontier() click to toggle source

A frontier with initially pre-staged URIs. @return [Frontier]

# File lib/wayfarer/crawl.rb, line 42
def frontier
  return @frontier if @frontier

  @frontier = case config.frontier
              when :memory_trie
                Frontiers::MemoryTrieFrontier.new(config)
              when :redis
                Frontiers::RedisFrontier.new(config)
              when :memory_bloom
                Frontiers::MemoryBloomfilter.new(config)
              when :redis_bloom
                Frontiers::RedisBloomfilter.new(config)
              else
                Frontiers::MemoryFrontier.new(config)
              end

  @frontier.extend(Frontiers::NormalizeURIs) if config.normalize_uris

  @frontier.stage(*@uris) # TODO: Test

  @frontier
end

Private Instance Methods

trap_signals() click to toggle source
# File lib/wayfarer/crawl.rb, line 67
def trap_signals
  @cached_sigint_handler = trap(:INT) {
    halt!
    @cached_sigint_handler.try(:call)
    exit(-1)
  }
end
untrap_signals() click to toggle source
# File lib/wayfarer/crawl.rb, line 75
def untrap_signals
  trap(:INT) { @cached_sigint_handler.try(:call) }
end