class VagueScraper::Scraper
Public Class Methods
attr_accessor(*attrs)
click to toggle source
Calls superclass method
# File lib/vague_scraper/scraper.rb, line 28 def attr_accessor(*attrs) @attrs ||= [] @attrs.concat attrs super end
attrs()
click to toggle source
# File lib/vague_scraper/scraper.rb, line 34 def attrs @attrs || self.superclass.instance_eval("@attrs") end
concurrency(value)
click to toggle source
# File lib/vague_scraper/scraper.rb, line 38 def concurrency(value) @concurrency = value end
execute(args)
click to toggle source
# File lib/vague_scraper/scraper.rb, line 7 def execute(args) if !args[:urls].nil? urls = args[:urls] Parallel. map_with_index(urls, in_threads: @concurrency || 1) do |url, id| puts "\e[34m[#{id + 1}] scraping: #{ url }\e[0m" scraper = self.new html = VagueScraper::Driver.call url scraper.parser html ({ url: url }).merge(scraper.result).tap do |result_hash| scraper.handler result_hash end end else raise 'You need pass an argument urls: or raw_htmls:' end end
Public Instance Methods
handler(result)
click to toggle source
# File lib/vague_scraper/scraper.rb, line 48 def handler result p result end
parser(html)
click to toggle source
# File lib/vague_scraper/scraper.rb, line 44 def parser html raise "You need to define #{self.class}#parser" end
result()
click to toggle source
# File lib/vague_scraper/scraper.rb, line 52 def result self.class.attrs.map { |k| [k, self.send(k)] }.to_h end