class Kudzu::Agent::UrlExtractor::ForXML
Public Class Methods
new(config)
click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 109 def initialize(config) @config = config end
Public Instance Methods
extract(response)
click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 113 def extract(response) doc = response.parsed_doc.dup doc.remove_namespaces! refs = from_rss(doc) + from_atom(doc) refs.reject { |ref| ref.url.nil? || ref.url.empty? } end
Private Instance Methods
from_atom(doc)
click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 130 def from_atom(doc) doc.xpath('feed/entry').map do |node| Reference.new(url: node.xpath('./link[@href]/@href').to_s, title: node.xpath('./title').inner_text) end end
from_rss(doc)
click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 123 def from_rss(doc) doc.xpath('rss/channel').map do |node| Reference.new(url: node.xpath('./item/link').inner_text, title: node.xpath('./item/title').inner_text) end end