class Hypermicrodata::ItempropParser

Class that parses itemprop elements

Constants

NON_TEXTCONTENT_ELEMENTS
URL_ATTRIBUTES

Attributes

property[R]

A Hash representing the properties. Hash is of the form {'property name' => 'value'}

Public Class Methods

new(element, page_url=nil) click to toggle source

Create a new Itemprop object

element

The itemprop element to be parsed

page_url

The url of the page, including filename, used to form absolute urls

# File lib/hypermicrodata/itemprop_parser.rb, line 28
def initialize(element, page_url=nil)
  @element, @page_url = element, page_url
  if link?
    @property = Link.new(extract_property, extract_property_names, extract_rel_names)
  else
    @property = Property.new(extract_property, extract_property_names, extract_rel_names)
  end
end
parse(element, page_url=nil) click to toggle source

Parse the element and return a hash representing the properties. Hash is of the form {'property name' => 'value'}

element

The itemprop element to be parsed

page_url

The url of the page, including filename, used to form absolute urls

# File lib/hypermicrodata/itemprop_parser.rb, line 42
def self.parse(element, page_url=nil)
  self.new(element, page_url).property
end

Public Instance Methods

Private Instance Methods

extract_attribute() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 111
def extract_attribute
  attribute_name = NON_TEXTCONTENT_ELEMENTS[@element.name]
  attribute = @element.attribute('content') || @element.attribute(attribute_name)
  value = attribute.nil? ? nil : attribute.value
  url_attribute?(attribute) ? make_absolute_url(value) : value
end
extract_properties() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 51
def extract_properties
  prop_names = extract_property_names
  prop_names.each_with_object({}) do |name, memo|
    memo[name] = extract_property
  end
end
extract_property() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 103
def extract_property
  if @element.attribute('itemscope')
    Item.parse(@element, @page_url)
  else
    extract_property_value
  end
end
extract_property_names() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 84
def extract_property_names
  itemprop_attr = @element.attribute('itemprop')
  itemprop_attr ? itemprop_attr.value.split : []
end
extract_property_value() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 94
def extract_property_value
  if non_textcontent_element?(@element.name)
    extract_attribute
  else
    attribute = @element.attribute('content')
    attribute ? attribute.value.strip : @element.inner_text.strip
  end
end
extract_rel_names() click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 89
def extract_rel_names
  link_rel = @element.attribute('rel')
  link? && link_rel ? link_rel.value.split : []
end
make_absolute_url(url) click to toggle source

This returns an empty string if can't form a valid absolute url as per the Microdata spec.

# File lib/hypermicrodata/itemprop_parser.rb, line 67
def make_absolute_url(url)
  return url unless Addressable::URI.parse(url).relative?
  begin
    Addressable::URI.parse(@page_url).merge(url).to_s
  rescue
    url
  end
end
non_textcontent_element?(element) click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 76
def non_textcontent_element?(element)
  NON_TEXTCONTENT_ELEMENTS.has_key?(element)
end
url_attribute?(attribute) click to toggle source
# File lib/hypermicrodata/itemprop_parser.rb, line 80
def url_attribute?(attribute)
  URL_ATTRIBUTES.include?(attribute)
end