class Wraith::Spider

Constants

EXT

Attributes

wraith[R]

Public Class Methods

new(config) click to toggle source
# File lib/wraith/spider.rb, line 18
def initialize(config)
  @wraith = Wraith::Wraith.new(config, { imports_must_resolve: false })
  @paths = {}
end

Public Instance Methods

add_path(path) click to toggle source
# File lib/wraith/spider.rb, line 39
def add_path(path)
  @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
end
crawl() click to toggle source
# File lib/wraith/spider.rb, line 23
def crawl
  logger.info "Crawling #{wraith.base_domain}"
  Anemone.crawl(wraith.base_domain) do |anemone|
    anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
    # Add user specified skips
    anemone.skip_links_like(wraith.spider_skips)
    anemone.on_every_page do |page|
      logger.info "    #{page.url.path}"
      add_path(page.url.path)
    end
  end

  logger.info "Crawl complete."
  write_file
end
write_file() click to toggle source
# File lib/wraith/spider.rb, line 43
def write_file
  logger.info "Writing to YML file..."
  config = {}
  config['paths'] = @paths
  File.open("#{wraith.config_dir}/#{wraith.imports}", "w+") do |file|
    file.write(config.to_yaml)
    logger.info "Spider paths written to #{wraith.imports}"
  end
end