class Anemone::PageStore
Public Class Methods
new(storage = {})
click to toggle source
# File lib/anemone/page_store.rb, line 9 def initialize(storage = {}) @storage = storage end
Public Instance Methods
[](index)
click to toggle source
We typically index the hash with a URI, but convert it to a String for easier retrieval
# File lib/anemone/page_store.rb, line 15 def [](index) @storage[index.to_s] end
[]=(index, other)
click to toggle source
# File lib/anemone/page_store.rb, line 19 def []=(index, other) @storage[index.to_s] = other end
delete(key)
click to toggle source
# File lib/anemone/page_store.rb, line 23 def delete(key) @storage.delete key.to_s end
each_value() { |value| ... }
click to toggle source
# File lib/anemone/page_store.rb, line 31 def each_value each { |key, value| yield value } end
has_key?(key)
click to toggle source
# File lib/anemone/page_store.rb, line 27 def has_key?(key) @storage.has_key? key.to_s end
has_page?(url)
click to toggle source
Does this PageStore
contain the specified URL? HTTP
and HTTPS versions of a URL are considered to be the same page.
# File lib/anemone/page_store.rb, line 51 def has_page?(url) schemes = %w(http https) if schemes.include? url.scheme u = url.dup return schemes.any? { |s| u.scheme = s; has_key?(u) } end has_key? url end
pages_linking_to(urls)
click to toggle source
If given a single URL (as a String or URI), returns an Array of Pages which link to that URL If given an Array of URLs, returns a Hash (URI => [Page, Page
…]) of Pages linking to those URLs
# File lib/anemone/page_store.rb, line 111 def pages_linking_to(urls) unless urls.is_a?(Array) urls = [urls] single = true end urls.map! do |url| unless url.is_a?(URI) URI(url) rescue nil else url end end urls.compact links = {} urls.each { |url| links[url] = [] } values.each do |page| urls.each { |url| links[url] << page if page.links.include?(url) } end if single and !links.empty? return links[urls.first] else return links end end
shortest_paths!(root)
click to toggle source
Use a breadth-first search to calculate the single-source shortest paths from root to all pages in the PageStore
# File lib/anemone/page_store.rb, line 65 def shortest_paths!(root) root = URI(root) if root.is_a?(String) raise "Root node not found" if !has_key?(root) q = Queue.new q.enq root root_page = self[root] root_page.depth = 0 root_page.visited = true self[root] = root_page while !q.empty? page = self[q.deq] page.links.each do |u| begin link = self[u] next if link.nil? || !link.fetched? || link.visited q << u unless link.redirect? link.visited = true link.depth = page.depth + 1 self[u] = link if link.redirect? u = link.redirect_to redo end end end end self end
touch_key(key)
click to toggle source
# File lib/anemone/page_store.rb, line 41 def touch_key(key) self[key] = Page.new(key) end
touch_keys(keys)
click to toggle source
# File lib/anemone/page_store.rb, line 45 def touch_keys(keys) @storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h } end
uniq!()
click to toggle source
Removes all Pages from storage where redirect? is true
# File lib/anemone/page_store.rb, line 102 def uniq! each_value { |page| delete page.url if page.redirect? } self end
urls_linking_to(urls)
click to toggle source
If given a single URL (as a String or URI), returns an Array of URLs which link to that URL If given an Array of URLs, returns a Hash (URI => [URI, URI…]) of URLs linking to those URLs
# File lib/anemone/page_store.rb, line 143 def urls_linking_to(urls) unless urls.is_a?(Array) urls = [urls] unless urls.is_a?(Array) single = true end links = pages_linking_to(urls) links.each { |url, pages| links[url] = pages.map{|p| p.url} } if single and !links.empty? return links[urls.first] else return links end end
values()
click to toggle source
# File lib/anemone/page_store.rb, line 35 def values result = [] each { |key, value| result << value } result end