class Hypermicrodata::Document

Attributes

doc[R]
items[R]

Public Class Methods

new(content, options = {}) click to toggle source
# File lib/hypermicrodata/document.rb, line 6
def initialize(content, options = {})
  encoding = options[:force_encoding] || nil
  @doc = Nokogiri::HTML(content, nil, encoding)
  @page_url = options[:page_url]
  @filter_xpath_attr = options[:filter_xpath_attr]
  @items = extract_items
end

Public Instance Methods

extract_items() click to toggle source
# File lib/hypermicrodata/document.rb, line 14
def extract_items
  itemscopes.collect do |itemscope|
    Item.parse(itemscope, @page_url)
  end
end

Private Instance Methods

itemscopes() click to toggle source
# File lib/hypermicrodata/document.rb, line 22
def itemscopes
  items_xpath = 'self::*[@itemscope] | .//*[@itemscope and not(@itemprop)] | .//form[not(@itemprop)]'
  if @filter_xpath_attr
    filtered_doc = @doc.xpath("//*[#{@filter_xpath_attr}]")
    unless filtered_doc.empty?
      return filtered_doc.xpath(items_xpath)
    end
  end
  print "XPath //*[#{@filter_xpath_attr}] is not found. "
  filtered_doc = @doc.xpath('//main')
  unless filtered_doc.empty?
    print "main node is used.\n"
    return filtered_doc.xpath(items_xpath)
  end
  print "root node is used.\n"
  @doc.xpath(items_xpath)
end