class SitemapMaker::SiteTree::Tree
Public Class Methods
new( url )
click to toggle source
# File lib/SitemapMaker/site_tree.rb, line 22 def initialize( url ) @page = Nokogiri::HTML open(url) @url = url @uri = URI.parse( url ) end
Public Instance Methods
own_links()
click to toggle source
return page have own links
# File lib/SitemapMaker/site_tree.rb, line 29 def own_links agent = Mechanize.new page = agent.get(@url) # XXX Regexp修正必要 # test more http://www.rubular.com own_link_regexp = Regexp.new(%r!(#{@uri.host})[a-zA-Z0-9.?\/=%&一-龠亜-煕-]+$!) # pageが持っているリンク have_links = page.links_with( href: own_link_regexp ) own_links = [] have_links.each do |link| url = complete_url(page.uri, link.uri) own_links << url unless own_links.include?( url ) end own_links end
tree_all(url = @url, target_links = {}, deep_level = nil )
click to toggle source
return recursive own links
# File lib/SitemapMaker/site_tree.rb, line 50 def tree_all(url = @url, target_links = {}, deep_level = nil ) deep_level ||= SitemapMaker::Utils::DEFAULT_LEVEL deep_level = deep_level.to_i if deep_level.is_a?(String) domain_links = target_links.dup Tree.new(url).own_links.each do |link| link_key = link.to_s # 新規リンクの場合は追記 if domain_links[link_key].blank? begin # deep_levelに達したら終了 if deep_level > 0 # meta情報の取得 domain_links[link_key] = get_meta(link_key) domain_links.merge! tree_all(link_key, domain_links, deep_level - 1) end rescue => e # TODO 404なら、リンク切れと明記 domain_links[link_key] = { error: "リンク切れ" } puts e ensure puts link_key end end domain_links end domain_links end
Private Instance Methods
complete_url(base_uri, path = nil)
click to toggle source
fix up url with base_uri
# File lib/SitemapMaker/site_tree.rb, line 94 def complete_url(base_uri, path = nil) return path.to_s if path.to_s.match(@uri.host) if path.to_s.match(/^\//) # 絶対path "http://#{base_uri.host}#{path.to_s}" else # 相対path "#{base_uri.directory_path}#{path}" end end
get_meta(url)
click to toggle source
return meta detail
# File lib/SitemapMaker/site_tree.rb, line 83 def get_meta(url) page = SitemapMaker::HTML open(url) return { description: page.description, title: page.title, keywords: page.keywords, } end