class Oga::XML::Parser

DOM parser for both XML and HTML.

This parser does not produce a dedicated AST, instead it emits XML nodes directly. Basic usage of this parser is as following:

parser   = Oga::XML::Parser.new('<foo></foo>')
document = parser.parse

To enable HTML parsing you'd use the following instead:

parser   = Oga::XML::Parser.new('<foo></foo>', :html => true)
document = parser.parse

In both cases you can use either a String or an IO as the parser input. IO instances will result in lower memory overhead, especially when parsing large files.

Constants

CONFIG
TOKEN_ERROR_MAPPING

Hash mapping token types and dedicated error labels.

@return [Hash]

Public Class Methods

new(data, options = {}) click to toggle source

@param [String|IO] data The input to parse. @param [Hash] options @see [Oga::XML::Lexer#initialize]

# File lib/oga/xml/parser.rb, line 212
def initialize(data, options = {})
  @data  = data
  @lexer = Lexer.new(data, options)
  @line = 1
  @lexer.reset_native
end

Public Instance Methods

_rule_0(val) click to toggle source
# File lib/oga/xml/parser.rb, line 362
def _rule_0(val)
   on_document(val[0]) 
end
_rule_1(val) click to toggle source
# File lib/oga/xml/parser.rb, line 366
def _rule_1(val)
  val[0]
end
_rule_10(val) click to toggle source
# File lib/oga/xml/parser.rb, line 410
def _rule_10(val)
   val[0].inject(:+) 
end
_rule_11(val) click to toggle source
# File lib/oga/xml/parser.rb, line 414
def _rule_11(val)
   on_cdata(val[1]) 
end
_rule_12(val) click to toggle source
# File lib/oga/xml/parser.rb, line 418
def _rule_12(val)
   val[0] + val[1] 
end
_rule_13(val) click to toggle source
# File lib/oga/xml/parser.rb, line 422
def _rule_13(val)
   '' 
end
_rule_14(val) click to toggle source
# File lib/oga/xml/parser.rb, line 426
def _rule_14(val)
   on_comment(val[1]) 
end
_rule_15(val) click to toggle source
# File lib/oga/xml/parser.rb, line 430
def _rule_15(val)
   val[0] + val[1] 
end
_rule_16(val) click to toggle source
# File lib/oga/xml/parser.rb, line 434
def _rule_16(val)
   '' 
end
_rule_17(val) click to toggle source
# File lib/oga/xml/parser.rb, line 438
def _rule_17(val)
  
    on_proc_ins(val[1], val[2])
  
end
_rule_18(val) click to toggle source
# File lib/oga/xml/parser.rb, line 444
def _rule_18(val)
   val[0] + val[1] 
end
_rule_19(val) click to toggle source
# File lib/oga/xml/parser.rb, line 448
def _rule_19(val)
   '' 
end
_rule_2(val) click to toggle source
# File lib/oga/xml/parser.rb, line 370
def _rule_2(val)
  val[0]
end
_rule_20(val) click to toggle source
# File lib/oga/xml/parser.rb, line 452
def _rule_20(val)
   [nil, val[0]] 
end
_rule_21(val) click to toggle source
# File lib/oga/xml/parser.rb, line 456
def _rule_21(val)
   val 
end
_rule_22(val) click to toggle source
# File lib/oga/xml/parser.rb, line 460
def _rule_22(val)
  
    on_element(val[0][0], val[0][1], val[1])
  
end
_rule_23(val) click to toggle source
# File lib/oga/xml/parser.rb, line 466
def _rule_23(val)
  
    if val[0]
      on_element_children(val[0], val[1])
    end

    after_element(val[0])
  
end
_rule_24(val) click to toggle source
# File lib/oga/xml/parser.rb, line 476
def _rule_24(val)
   on_attributes(val[0]) 
end
_rule_25(val) click to toggle source
# File lib/oga/xml/parser.rb, line 480
def _rule_25(val)
   on_attribute(val[1], val[0], val[2]) 
end
_rule_26(val) click to toggle source
# File lib/oga/xml/parser.rb, line 484
def _rule_26(val)
   on_attribute(val[0], nil, val[1]) 
end
_rule_27(val) click to toggle source
# File lib/oga/xml/parser.rb, line 488
def _rule_27(val)
   on_xml_decl(val[1]) 
end
_rule_28(val) click to toggle source
# File lib/oga/xml/parser.rb, line 492
def _rule_28(val)
  
    text = val[1] ? val[0] + val[1] : val[0]

    on_text(text)
  
end
_rule_29(val) click to toggle source
# File lib/oga/xml/parser.rb, line 500
def _rule_29(val)
   val[1] ? val[0] + val[1] : val[0] 
end
_rule_3(val) click to toggle source
# File lib/oga/xml/parser.rb, line 374
def _rule_3(val)
  val[0]
end
_rule_30(val) click to toggle source
# File lib/oga/xml/parser.rb, line 504
def _rule_30(val)
   nil 
end
_rule_31(val) click to toggle source
# File lib/oga/xml/parser.rb, line 508
def _rule_31(val)
   val[1] 
end
_rule_32(val) click to toggle source
# File lib/oga/xml/parser.rb, line 512
def _rule_32(val)
   val[1] 
end
_rule_33(val) click to toggle source
# File lib/oga/xml/parser.rb, line 516
def _rule_33(val)
   val[0] + val[1] 
end
_rule_34(val) click to toggle source
# File lib/oga/xml/parser.rb, line 520
def _rule_34(val)
   '' 
end
_rule_35(val) click to toggle source
# File lib/oga/xml/parser.rb, line 524
def _rule_35(val)
  val[0]
end
_rule_36(val) click to toggle source
# File lib/oga/xml/parser.rb, line 528
def _rule_36(val)
  val[0]
end
_rule_37(val) click to toggle source
# File lib/oga/xml/parser.rb, line 532
def _rule_37(val)
  val[0]
end
_rule_38(val) click to toggle source
# File lib/oga/xml/parser.rb, line 536
def _rule_38(val)
  val[0]
end
_rule_39(val) click to toggle source
# File lib/oga/xml/parser.rb, line 540
def _rule_39(val)
  val[0]
end
_rule_4(val) click to toggle source
# File lib/oga/xml/parser.rb, line 378
def _rule_4(val)
  val[0]
end
_rule_40(val) click to toggle source
# File lib/oga/xml/parser.rb, line 544
def _rule_40(val)
  val[0]
end
_rule_41(val) click to toggle source
# File lib/oga/xml/parser.rb, line 548
def _rule_41(val)
  val[0]
end
_rule_42(val) click to toggle source
# File lib/oga/xml/parser.rb, line 552
def _rule_42(val)
  val[0]
end
_rule_5(val) click to toggle source
# File lib/oga/xml/parser.rb, line 382
def _rule_5(val)
  val[0]
end
_rule_6(val) click to toggle source
# File lib/oga/xml/parser.rb, line 386
def _rule_6(val)
  val[0]
end
_rule_7(val) click to toggle source
# File lib/oga/xml/parser.rb, line 390
def _rule_7(val)
  val[0]
end
_rule_8(val) click to toggle source
# File lib/oga/xml/parser.rb, line 394
def _rule_8(val)
  val[0]
end
_rule_9(val) click to toggle source
# File lib/oga/xml/parser.rb, line 398
def _rule_9(val)
  
    on_doctype(
      :name         => val[1],
      :type         => val[2],
      :public_id    => val[3],
      :system_id    => val[4],
      :inline_rules => val[5]
    )
  
end
after_element(element) click to toggle source

@param [Oga::XML::Element] element @return [Oga::XML::Element]

# File lib/oga/xml/parser.rb, line 341
def after_element(element)
  element
end
each_token() { |type, value| ... } click to toggle source

Yields the next token from the lexer.

@yieldparam [Array]

# File lib/oga/xml/parser.rb, line 222
def each_token
  @lexer.advance do |type, value, line|
    @line = line if line

    yield [type, value]
  end

  yield [-1, -1]
end
on_attribute(name, ns_name = nil, value = nil) click to toggle source

@param [String] name @param [String] ns_name @param [String] value @return [Oga::XML::Attribute]

# File lib/oga/xml/parser.rb, line 349
def on_attribute(name, ns_name = nil, value = nil)
  Attribute.new(
    :namespace_name => ns_name,
    :name           => name,
    :value          => value
  )
end
on_attributes(attrs) click to toggle source

@param [Array] attrs

# File lib/oga/xml/parser.rb, line 358
def on_attributes(attrs)
  attrs
end
on_cdata(text = nil) click to toggle source

@param [String] text @return [Oga::XML::Cdata]

# File lib/oga/xml/parser.rb, line 281
def on_cdata(text = nil)
  Cdata.new(:text => text)
end
on_comment(text = nil) click to toggle source

@param [String] text @return [Oga::XML::Comment]

# File lib/oga/xml/parser.rb, line 287
def on_comment(text = nil)
  Comment.new(:text => text)
end
on_doctype(options = {}) click to toggle source

@param [Hash] options

# File lib/oga/xml/parser.rb, line 275
def on_doctype(options = {})
  Doctype.new(options)
end
on_document(children = []) click to toggle source

@param [Array] children @return [Oga::XML::Document]

# File lib/oga/xml/parser.rb, line 256
def on_document(children = [])
  document = Document.new(:type => @lexer.html? ? :html : :xml)

  children.each do |child|
    if child.is_a?(Doctype)
      document.doctype = child

    elsif child.is_a?(XmlDeclaration)
      document.xml_declaration = child

    else
      document.children << child
    end
  end

  document
end
on_element(namespace, name, attributes = {}) click to toggle source

@param [String] namespace @param [String] name @param [Hash] attributes @return [Oga::XML::Element]

# File lib/oga/xml/parser.rb, line 320
def on_element(namespace, name, attributes = {})
  element = Element.new(
    :namespace_name => namespace,
    :name           => name,
    :attributes     => attributes
  )

  element
end
on_element_children(element, children = []) click to toggle source

@param [Oga::XML::Element] element @param [Array] children @return [Oga::XML::Element]

# File lib/oga/xml/parser.rb, line 333
def on_element_children(element, children = [])
  element.children = children

  element
end
on_proc_ins(name, text = nil) click to toggle source

@param [String] name @param [String] text @return [Oga::XML::ProcessingInstruction]

# File lib/oga/xml/parser.rb, line 294
def on_proc_ins(name, text = nil)
  ProcessingInstruction.new(:name => name, :text => text)
end
on_text(text) click to toggle source

@param [String] text @return [Oga::XML::Text]

# File lib/oga/xml/parser.rb, line 312
def on_text(text)
  Text.new(:text => text)
end
on_xml_decl(attributes = []) click to toggle source

@param [Array] attributes @return [Oga::XML::XmlDeclaration]

# File lib/oga/xml/parser.rb, line 300
def on_xml_decl(attributes = [])
  options = {}

  attributes.each do |attr|
    options[attr.name.to_sym] = attr.value
  end

  XmlDeclaration.new(options)
end
parser_error(stack_type, stack_value, token_type, token_value) click to toggle source

@param [Fixnum] stack_type @param [Fixnum] stack_value @param [Symbol] token_type @param [String] token_value

# File lib/oga/xml/parser.rb, line 236
def parser_error(stack_type, stack_value, token_type, token_value)
  case id_to_type(stack_type)
  when :rule
    message = "Unexpected #{token_type} for rule #{stack_value}"
  when :terminal
    expected = id_to_terminal(stack_value)
    expected = TOKEN_ERROR_MAPPING[expected]   || expected
    got      = TOKEN_ERROR_MAPPING[token_type] || token_type
    message  = "Unexpected #{got}, expected #{expected} instead"
  when :eof
    message = 'Unexpected end of input'
  end

  message += " on line #{@line}"

  raise LL::ParserError, message
end