class FeedSearcher::Page

Constants

EXTENSIONS
MIME_TYPES

Attributes

page[R]

Public Class Methods

new(page) click to toggle source
# File lib/feed_searcher/page.rb, line 17
def initialize(page)
  @page = page
end

Public Instance Methods

feed_urls() click to toggle source
# File lib/feed_searcher/page.rb, line 21
def feed_urls
  urls = []
  urls << url if like_xml? && parsable_as_xml? && has_feed_element?
  urls += links.map {|link| link["href"] }
end

Private Instance Methods

body() click to toggle source
# File lib/feed_searcher/page.rb, line 69
def body
  page.body
end
content_type() click to toggle source
# File lib/feed_searcher/page.rb, line 57
def content_type
  page.response["content-type"]
end
extension() click to toggle source
# File lib/feed_searcher/page.rb, line 65
def extension
  File.extname(page.uri.path).sub(/^\./, "")
end
has_feed_element?() click to toggle source
# File lib/feed_searcher/page.rb, line 41
def has_feed_element?
  root.xpath("contains(' feed RDF rss ', concat(' ', local-name(/*), ' '))")
end
has_feed_extension?() click to toggle source
# File lib/feed_searcher/page.rb, line 37
def has_feed_extension?
  EXTENSIONS.include?(extension)
end
has_feed_mime_type?() click to toggle source
# File lib/feed_searcher/page.rb, line 33
def has_feed_mime_type?
  MIME_TYPES.include?(mime_type)
end
has_xml_declaration?() click to toggle source
# File lib/feed_searcher/page.rb, line 29
def has_xml_declaration?
  !!body.start_with?("<?xml")
end
html() click to toggle source
# File lib/feed_searcher/page.rb, line 93
def html
  Nokogiri.HTML(body)
end
like_xml?() click to toggle source
# File lib/feed_searcher/page.rb, line 49
def like_xml?
  has_xml_declaration? || has_feed_mime_type? || has_feed_extension?
end
mime_type() click to toggle source
# File lib/feed_searcher/page.rb, line 61
def mime_type
  content_type.sub(/;.*\z/, "") if content_type
end
parsable_as_xml?() click to toggle source
# File lib/feed_searcher/page.rb, line 45
def parsable_as_xml?
  !!xml
end
parse_xml() click to toggle source
# File lib/feed_searcher/page.rb, line 97
def parse_xml
  Nokogiri.XML(body) do |config|
    config.options = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::NOENT
  end
rescue
  false
end
root() click to toggle source
# File lib/feed_searcher/page.rb, line 81
def root
  xml || html
end
types_query() click to toggle source
# File lib/feed_searcher/page.rb, line 77
def types_query
  MIME_TYPES.map {|type| "@type='#{type}'" }.join(" or ")
end
url() click to toggle source
# File lib/feed_searcher/page.rb, line 53
def url
  page.uri.to_s
end
xml() click to toggle source
# File lib/feed_searcher/page.rb, line 85
def xml
  if @xml.nil?
    @xml = parse_xml
  else
    @xml
  end
end