class Sitemaped

Constants

VERSION

Public Class Methods

new(url) click to toggle source
# File lib/sitemaped.rb, line 10
def initialize(url)
  @url = URI.parse(URI.encode(url))
  raise URI::InvalidURIError.new('scheme or host missing') unless @url.scheme and @url.host
  @sitemap = Set.new
end

Public Instance Methods

include?(path) click to toggle source
# File lib/sitemaped.rb, line 20
def include?(path)
  sitemaps.include?(path)
end
sitemap() click to toggle source
# File lib/sitemaped.rb, line 16
def sitemap
  sitemaps.to_a
end

Private Instance Methods

default_sitemap() click to toggle source
# File lib/sitemaped.rb, line 40
def default_sitemap
  @default_sitemap_data ||= parse_sitemap(load_sitemap(URI.join(@url, "sitemap.xml")) || load_sitemap(URI.join(@url, "sitemap.xml.gz")))
end
handle_nested_sitemaps(sitemap_list=[]) click to toggle source
# File lib/sitemaped.rb, line 44
def handle_nested_sitemaps(sitemap_list=[])
  return sitemap_list.map do |sitemap_url|
    load_sitemap(sitemap_url)
  end.compact.map do |sitemap_io|
    parse_sitemap(sitemap_io)
  end.compact.flatten
end
load_sitemap(url=nil) click to toggle source
# File lib/sitemaped.rb, line 52
def load_sitemap(url=nil)
  sitemap_io = open(url)
rescue
  return nil
else
  begin
    return Zlib::GzipReader.new(sitemap_io)
  rescue
    sitemap_io.rewind
    return sitemap_io
  end
end
parse_sitemap(sitemap_io) click to toggle source
# File lib/sitemaped.rb, line 65
def parse_sitemap(sitemap_io)
  sitemap_data = Nokogiri::HTML(sitemap_io)
  if nested_sitemaps = sitemap_data.xpath("//sitemapindex/sitemap/loc") and !nested_sitemaps.empty?
    return handle_nested_sitemaps(nested_sitemaps.map(&:text).flatten)
  end

  return sitemap_data.xpath("//urlset/url/loc").map(&:text).flatten
rescue
  nil
end
robots_sitemap() click to toggle source
# File lib/sitemaped.rb, line 30
def robots_sitemap
  @robots_sitemap_urls ||= open(URI.join(@url, "robots.txt")).read.scan(/\s*sitemap:\s*([^\r\n]+)\s*$/i).flatten!
rescue
  @robots_sitemap_urls = []
ensure
  unless @robots_sitemap_urls.empty?
    return @robots_sitemap_data ||= handle_nested_sitemaps(@robots_sitemap_urls)
  end
end
sitemaps() click to toggle source
# File lib/sitemaped.rb, line 26
def sitemaps
  return @sitemap.empty? ? @sitemap.merge(robots_sitemap || default_sitemap) : @sitemap
end