module Horsefield::Scraper
Public Class Methods
included(base)
click to toggle source
# File lib/horsefield/scraper.rb, line 14 def self.included(base) base.extend(ClassMethods) end
new(html_xml_or_url, remove_namespaces: false)
click to toggle source
# File lib/horsefield/scraper.rb, line 18 def initialize(html_xml_or_url, remove_namespaces: false) html_xml_or_url = open(html_xml_or_url).read if html_xml_or_url =~ /\A#{URI::regexp}\Z/ @doc = if html_xml_or_url =~ /\A<\?xml/ doc = Nokogiri::XML(html_xml_or_url) doc = doc.remove_namespaces! if remove_namespaces doc else Nokogiri::HTML(html_xml_or_url) end end
scrape(html_or_url, &block)
click to toggle source
# File lib/horsefield/scraper.rb, line 7 def self.scrape(html_or_url, &block) Class.new { include Horsefield::Scraper instance_eval(&block) }.new(html_or_url).scrape end
Public Instance Methods
[](field)
click to toggle source
# File lib/horsefield/scraper.rb, line 30 def [](field) fields[field] end
fields()
click to toggle source
# File lib/horsefield/scraper.rb, line 38 def fields @fields ||= self.class.lookups.reduce({}) { |fields, l| fields.merge(l.call(@doc)) }. instance_eval(&self.class.postprocessor) end
scrape()
click to toggle source
# File lib/horsefield/scraper.rb, line 34 def scrape fields end