class Extractor
Attributes
doc[RW]
Public Instance Methods
extract_image(xpath)
click to toggle source
# File lib/feed_ninja/extractor.rb, line 23 def extract_image(xpath) @doc.xpath(xpath).collect do | picture_href | URI.join(@base_uri, picture_href) end end
extract_images(xpaths)
click to toggle source
# File lib/feed_ninja/extractor.rb, line 15 def extract_images(xpaths) LOGGER.debug{ "collecting images for #{xpaths}" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting image:xpath #{xpath}" } extract_image(xpath) end end
extract_xml(xpaths)
click to toggle source
# File lib/feed_ninja/extractor.rb, line 29 def extract_xml(xpaths) LOGGER.debug{ "collecting text" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting text:xpath #{xpath}" } @doc.xpath(xpath).collect do |result| LOGGER.debug{ "collecting text:result #{result}" } result.to_s end end end
fetch(uri)
click to toggle source
# File lib/feed_ninja/extractor.rb, line 7 def fetch uri open(URI(uri)) do |site| @doc = Nokogiri::HTML(site) @base_uri = site.base_uri #return extract_image(doc, site.base_uri), extract_xml(doc) end end