class OpenGraphReader::Parser

Parse OpenGraph tags in a HTML document into a graph.

@api private

Attributes

additional_namespaces[R]

Namespaces found in the passed documents head tag

@return [Array<String>]

Public Class Methods

new(html) click to toggle source

Create a new parser.

@param [#to_s, Nokogiri::XML::Node] html the document to parse.

# File lib/open_graph_reader/parser.rb, line 28
def initialize html
  @doc = to_doc html
  @additional_namespaces = []
end

Public Instance Methods

any_tags?() click to toggle source

Whether there are any OpenGraph tags at all.

@return [Bool]

# File lib/open_graph_reader/parser.rb, line 36
def any_tags?
  graph.exist?("og")
end
graph() click to toggle source

Build and return the {Graph}.

@return [Graph]

# File lib/open_graph_reader/parser.rb, line 43
def graph
  @graph ||= build_graph
end
title() click to toggle source

The value of the title tag of the passed document.

@return [String]

# File lib/open_graph_reader/parser.rb, line 50
def title
  @doc.xpath("/html/head/title").first&.text
end

Private Instance Methods

build_graph() click to toggle source
# File lib/open_graph_reader/parser.rb, line 56
def build_graph
  graph = Graph.new

  meta_tags.each do |tag|
    *path, leaf = tag["property"].downcase.split(":")
    node = graph.find_or_create_path path

    # @todo make stripping configurable?
    node << Graph::Node.new(leaf, tag["content"].strip)
  end

  graph
end
meta_tags() click to toggle source
# File lib/open_graph_reader/parser.rb, line 70
def meta_tags
  head = @doc.xpath("/html/head").first

  raise NoOpenGraphDataError, "There's no head tag in #{@doc}" unless head

  head.xpath("meta[#{xpath_condition(head)}]", XPathHelpers)
end
to_doc(html) click to toggle source
# File lib/open_graph_reader/parser.rb, line 93
def to_doc html
  case html
  when Nokogiri::XML::Node
    html
  else
    Nokogiri::HTML.parse(html.to_s)
  end
end
xpath_condition(head) click to toggle source
# File lib/open_graph_reader/parser.rb, line 78
def xpath_condition head
  condition = "ci_starts_with(@property, 'og:')"

  if head["prefix"]
    @additional_namespaces = head["prefix"].scan(/(\w+):\s*([^ ]+)/)
    @additional_namespaces.map! {|prefix, _| prefix.downcase }
    @additional_namespaces.each do |additional_namespace|
      next if additional_namespace == "og"
      condition << " or ci_starts_with(@property, '#{additional_namespace}')"
    end
  end

  "(#{condition}) and @content"
end