class SitemapReader

Parse sitemap

Example:

>> sm = SitemapReader.new('http://example.com/sitemap.xml').get_urls
=> [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]

… or read from file like this:

>> sm = SitemapReader.new('./sitemap.xml').get_urls
=> [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]

Public Class Methods

new(file_or_url) click to toggle source

Arguments:

file_or_url: (String)
# File lib/sitemap_reader.rb, line 17
def initialize(file_or_url)
        @doc = Nokogiri::XML(get_sitemap(file_or_url))
end

Public Instance Methods

get_sitemap(file_or_url) click to toggle source
# File lib/sitemap_reader.rb, line 32
def get_sitemap(file_or_url)
  if File.exist?(file_or_url)
    File.open(file_or_url)
  else
    require 'open-uri'
    open(file_or_url)
  end
end
get_urls() click to toggle source
# File lib/sitemap_reader.rb, line 21
  def get_urls
          @doc.css('url').map do |u|
                  {
  loc: u.css('loc').first.content,
  lastmod: url_lastmod(u.css('lastmod').first),
  changefreq: url_changefreq(u.css('changefreq').first),
  priority: url_priority(u.css('priority').first)
}
          end
  end
url_changefreq(changefreq) click to toggle source
# File lib/sitemap_reader.rb, line 41
def url_changefreq(changefreq)
  changefreq.content unless changefreq.nil?
end
url_lastmod(lastmod) click to toggle source
# File lib/sitemap_reader.rb, line 49
def url_lastmod(lastmod)
  begin
    W3cDatetime::parse(lastmod.content) unless lastmod.nil?
  rescue ArgumentError
  end
end
url_priority(priority) click to toggle source
# File lib/sitemap_reader.rb, line 45
def url_priority(priority)
  priority.content.to_f unless priority.nil?
end