class Dcs::Debian::Searcher
Constants
- BASE_URL
Attributes
n_limit[RW]
page_limit[RW]
Public Class Methods
new()
click to toggle source
# File lib/dcs/debian/helper.rb, line 11 def initialize @page_limit = 10 @n_limit = 10 end
Public Instance Methods
extract_entry(node, target)
click to toggle source
# File lib/dcs/debian/helper.rb, line 58 def extract_entry(node, target) url = BASE_URL + node.xpath("a").attribute("href").text raw = node.xpath("a/code").text raw =~ /(.+)debian\/(.+):(.+)/ package = $1 file = $2 line = $3 entry = {} case file when target entry[:path] = raw entry[:url] = url entry[:package] = package entry[:file] = file entry[:line] = line node.xpath("pre").each do |pre| data = [] pre.children.each do |cnode| if cnode.kind_of?(Nokogiri::XML::Element) if cnode.name != "br" data << indent + cnode.text end else data << indent + cnode.text end end entry[:pre] = data.join("\n") end else raise Error end entry end
extract_next_page_uri(node)
click to toggle source
# File lib/dcs/debian/helper.rb, line 45 def extract_next_page_uri(node) next_uri = nil node.xpath("//div[@id='pagination']").each do |div| div.xpath("a").each do |a| case a.text when "Next page" next_uri = BASE_URL + a.attribute("href").text end end end next_uri end
indent()
click to toggle source
# File lib/dcs/debian/helper.rb, line 92 def indent " " * 2 end
pagination(target, keyword) { |entry| ... }
click to toggle source
# File lib/dcs/debian/helper.rb, line 16 def pagination(target, keyword) next_uri = nil data = [] unless next_uri next_uri = sprintf("%s/search?q=%s+path%%3Adebian%%2F%s%%24", BASE_URL, keyword, target) end page = 1 n = 1 until next_uri.nil? or n > @n_limit or page > @page_limit html = open(next_uri, "r:utf-8").read Nokogiri.parse(html) do |doc| doc.xpath("//ul[@id='results']/li").each do |li| entry = extract_entry(li, target) unless entry.empty? if entry[:pre].include?(keyword) n = n + 1 yield(entry) end end end next_uri = extract_next_page_uri(doc) end page = page + 1 end end