class Embulk::Input::Sitemap::Client
Public Class Methods
new(url, params)
click to toggle source
# File lib/embulk/input/sitemap/client.rb, line 9 def initialize(url, params) @url = url @params = params end
Public Instance Methods
invoke()
click to toggle source
# File lib/embulk/input/sitemap/client.rb, line 21 def invoke items = [] Embulk.logger.info "GET #{@url}#{self.query}" response = open(@url + self.query) do |f| Embulk.logger.info "Content-Type = #{f.content_type}" case f.content_type when "application/x-gzip", "application/octet-stream" then `curl #{@url} | gunzip -d` else f.read end end document = Nokogiri::XML(response) sitemaps = document.css("sitemap") urls = document.css("url") Embulk.logger.info "Find #{sitemaps.length} sitemaps, #{urls.length} urls" if sitemaps.length > 0 items << sitemaps.collect do |sitemap| Client.new(sitemap.css("loc").first.text.to_s).invoke end end if urls.length > 0 items << urls.collect do |url| item = {} item[:loc] = url.css("loc").first.text.to_s item[:changefreq] = url.css("changefreq").first.text.to_s if url.css("changefreq").first item[:priority] = url.css("priority").first.text.to_s if url.css("priority").first item[:lastmod] = url.css("lastmod").first.text.to_s if url.css("lastmod").first OpenStruct.new(item) end end items.flatten end
query()
click to toggle source
# File lib/embulk/input/sitemap/client.rb, line 14 def query queries = @params.map{|param| "#{param["name"]}=#{param["value"]}"} return "?" + queries.join("&") if queries.length > 0 and @url.include?("?") == false return "&" + queries.join("&") if queries.length > 0 and @url.include?("?") return "" end