class CheckSitemap::XMLReader

Public Class Methods

new(filename, options={}) click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 4
def initialize(filename, options={})
  @url_or_filename = filename
  @options = {}
end

Public Instance Methods

each(&block) click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 17
def each(&block)
  return results.to_enum(:each) unless block_given?

  doc.css('loc').each do |loc|
    block.call loc.content
  end
end
sitemap_index?() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 9
def sitemap_index?
  doc.css('sitemapindex > sitemap').any?
end
urlset?() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 13
def urlset?
  doc.css('urlset > url').any?
end

Protected Instance Methods

doc() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 27
def doc
  @doc ||= begin
    CheckSitemap.log("Reading: '#{ @url_or_filename }'")
    Nokogiri::XML(read_xml) { |config| config.strict }
  rescue Nokogiri::XML::SyntaxError => e
    raise(CheckSitemap::XMLSyntaxError.new(e.message))
  end
end
download!() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 59
def download!
  begin
    CheckSitemap.log("Opening '#{ @url_or_filename }'")
    open(@url_or_filename)
  rescue Errno::ENOENT => e
    raise CheckSitemap::FileNotFound.new("Missing filename '#{@url_or_filename}'")
  rescue OpenURI::HTTPError => e
    raise CheckSitemap::HTTPNotFound.new("HTTP request to '#{@url_or_filename}' failed")
  end
end
gzip?() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 51
def gzip?
  File.extname(@url_or_filename) == '.gz'
end
raw_file() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 47
def raw_file
  @raw_file ||= download!
end
read_xml() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 36
def read_xml
  @xml_file = begin
    if gzip?
      gz=Zlib::GzipReader.new(raw_file)
      gz.read
    elsif xml?
      raw_file.read
    end || raise(CheckSitemap::InvalidContentType.new("File format or MIME type is not supported: '#{mime_type(raw_file)}'"))
  end
end
xml?() click to toggle source
# File lib/check_sitemap/xml_reader.rb, line 55
def xml?
  File.extname(@url_or_filename) == '.xml'
end