class SLA::Page
Attributes
depth[R]
parent[R]
uri[R]
Public Class Methods
new(uri, parent: nil, depth: 0)
click to toggle source
# File lib/sla/page.rb, line 5 def initialize(uri, parent: nil, depth: 0) if uri.is_a? String uri = "http://#{uri}" unless uri.start_with? 'http' uri = URI.parse uri uri.fragment = false end @uri, @parent, @depth = uri, parent, depth end
Public Instance Methods
code()
click to toggle source
# File lib/sla/page.rb, line 19 def code response.code || 'ERR' end
error()
click to toggle source
# File lib/sla/page.rb, line 15 def error response.error end
external?()
click to toggle source
# File lib/sla/page.rb, line 23 def external? parent ? (uri.host != parent.uri.host) : false end
inspect()
click to toggle source
# File lib/sla/page.rb, line 27 def inspect "#<Page url: #{url}, depth: #{depth}>" end
pages()
click to toggle source
# File lib/sla/page.rb, line 31 def pages @pages ||= pages! end
url()
click to toggle source
# File lib/sla/page.rb, line 35 def url uri.to_s end
valid?()
click to toggle source
# File lib/sla/page.rb, line 39 def valid? !response.error end
Private Instance Methods
anchors()
click to toggle source
# File lib/sla/page.rb, line 45 def anchors @anchors ||= dom.css('a[href]') end
content()
click to toggle source
# File lib/sla/page.rb, line 49 def content @content ||= response.content end
dom()
click to toggle source
# File lib/sla/page.rb, line 53 def dom @dom ||= Nokogiri::HTML content end
normalize_url(new_url)
click to toggle source
# File lib/sla/page.rb, line 57 def normalize_url(new_url) new_url = URI.parse new_url new_url.fragment = false result = new_url.absolute? ? new_url : URI.join(url, new_url) result.scheme =~ /^http/ ? result.to_s : nil end
pages!()
click to toggle source
# File lib/sla/page.rb, line 66 def pages! result = {} anchors.each do |a| url = normalize_url a['href'] next unless url page = Page.new url, parent: self, depth: depth+1 result[url] = page end result.values end
response()
click to toggle source
# File lib/sla/page.rb, line 77 def response @response ||= response! end
response!()
click to toggle source
# File lib/sla/page.rb, line 81 def response! response = WebCache.get url @uri = response.base_uri response end