class Tjcrawler::Parser
Public Class Methods
new() { |'Block required'| ... }
click to toggle source
a nokogiri doc will be yield in block
# File lib/tjcrawler/parser.rb, line 7 def initialize &block yield 'Block required' unless block_given? @strategy = block end
Public Instance Methods
parse(content)
click to toggle source
# File lib/tjcrawler/parser.rb, line 12 def parse content doc = Nokogiri::HTML(content) ret = nil @@semaphore.synchronize{ ret = @strategy[doc] } print :'.' ret end
start()
click to toggle source
# File lib/tjcrawler/parser.rb, line 20 def start loop do sleep 1 until page = find_next parse(page.content) page.touch(:parsed_at) end end
Private Instance Methods
find_next()
click to toggle source
# File lib/tjcrawler/parser.rb, line 30 def find_next Page.where('crawled_at IS NOT NULL AND (parsed_at IS NULL OR parsed_at < ?)', 1.day.ago).order('parsed_at IS NOT NULL, parsed_at').first end