class Wraith::Spider
Constants
- EXT
Attributes
wraith[R]
Public Class Methods
new(config)
click to toggle source
# File lib/wraith/spider.rb, line 18 def initialize(config) @wraith = Wraith::Wraith.new(config, { imports_must_resolve: false }) @paths = {} end
Public Instance Methods
add_path(path)
click to toggle source
# File lib/wraith/spider.rb, line 39 def add_path(path) @paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase end
crawl()
click to toggle source
# File lib/wraith/spider.rb, line 23 def crawl logger.info "Crawling #{wraith.base_domain}" Anemone.crawl(wraith.base_domain) do |anemone| anemone.skip_links_like(/\.(#{EXT.join('|')})$/) # Add user specified skips anemone.skip_links_like(wraith.spider_skips) anemone.on_every_page do |page| logger.info " #{page.url.path}" add_path(page.url.path) end end logger.info "Crawl complete." write_file end
write_file()
click to toggle source
# File lib/wraith/spider.rb, line 43 def write_file logger.info "Writing to YML file..." config = {} config['paths'] = @paths File.open("#{wraith.config_dir}/#{wraith.imports}", "w+") do |file| file.write(config.to_yaml) logger.info "Spider paths written to #{wraith.imports}" end end