class Kudzu::Agent::UrlExtractor::ForXML

Public Class Methods

new(config) click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 109
def initialize(config)
  @config = config
end

Public Instance Methods

extract(response) click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 113
def extract(response)
  doc = response.parsed_doc.dup
  doc.remove_namespaces!

  refs = from_rss(doc) + from_atom(doc)
  refs.reject { |ref| ref.url.nil? || ref.url.empty? }
end

Private Instance Methods

from_atom(doc) click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 130
def from_atom(doc)
  doc.xpath('feed/entry').map do |node|
    Reference.new(url: node.xpath('./link[@href]/@href').to_s,
                  title: node.xpath('./title').inner_text)
  end
end
from_rss(doc) click to toggle source
# File lib/kudzu/agent/url_extractor.rb, line 123
def from_rss(doc)
  doc.xpath('rss/channel').map do |node|
    Reference.new(url: node.xpath('./item/link').inner_text,
                  title: node.xpath('./item/title').inner_text)
  end
end