class SitemapReader
Parse sitemap
Example:
>> sm = SitemapReader.new('http://example.com/sitemap.xml').get_urls => [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]
… or read from file like this:
>> sm = SitemapReader.new('./sitemap.xml').get_urls => [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]
Public Class Methods
new(file_or_url)
click to toggle source
Arguments:
file_or_url: (String)
# File lib/sitemap_reader.rb, line 17 def initialize(file_or_url) @doc = Nokogiri::XML(get_sitemap(file_or_url)) end
Public Instance Methods
get_sitemap(file_or_url)
click to toggle source
# File lib/sitemap_reader.rb, line 32 def get_sitemap(file_or_url) if File.exist?(file_or_url) File.open(file_or_url) else require 'open-uri' open(file_or_url) end end
get_urls()
click to toggle source
# File lib/sitemap_reader.rb, line 21 def get_urls @doc.css('url').map do |u| { loc: u.css('loc').first.content, lastmod: url_lastmod(u.css('lastmod').first), changefreq: url_changefreq(u.css('changefreq').first), priority: url_priority(u.css('priority').first) } end end
url_changefreq(changefreq)
click to toggle source
# File lib/sitemap_reader.rb, line 41 def url_changefreq(changefreq) changefreq.content unless changefreq.nil? end
url_lastmod(lastmod)
click to toggle source
# File lib/sitemap_reader.rb, line 49 def url_lastmod(lastmod) begin W3cDatetime::parse(lastmod.content) unless lastmod.nil? rescue ArgumentError end end
url_priority(priority)
click to toggle source
# File lib/sitemap_reader.rb, line 45 def url_priority(priority) priority.content.to_f unless priority.nil? end