module Plumnailer::Doc

Nokogiri::HTML:Document mixin.

Attributes

source_url[RW]

Public Instance Methods

doc_base_href() click to toggle source

Get the href attribute of the base tag from the head of the document.

# File lib/plumnailer/doc.rb, line 9
def doc_base_href
  base = at('//head/base')
  base['href']  if base
end
img_abs_urls(base_url=nil) click to toggle source

Return a list of the absolute urls of all imgs in the document.

# File lib/plumnailer/doc.rb, line 20
def img_abs_urls(base_url=nil)
  result = []

  img_srcs.each do |i|
    begin
      u = URI(i)
    rescue URI::InvalidURIError
      next
    end

    result <<  if u.is_a?(URI::HTTP)
      u
    else
      URI.join(base_url || doc_base_href || source_url, i)
    end
  end

  result
end
img_srcs() click to toggle source

Return a list of the src attributes of all img tags.

# File lib/plumnailer/doc.rb, line 15
def img_srcs
  search('//img').map { |x| x['src'] }.compact
end