class Oga::XML::Parser
DOM parser for both XML
and HTML
.
This parser does not produce a dedicated AST, instead it emits XML
nodes directly. Basic usage of this parser is as following:
parser = Oga::XML::Parser.new('<foo></foo>') document = parser.parse
To enable HTML
parsing you'd use the following instead:
parser = Oga::XML::Parser.new('<foo></foo>', :html => true) document = parser.parse
In both cases you can use either a String or an IO as the parser input. IO instances will result in lower memory overhead, especially when parsing large files.
Constants
- CONFIG
- TOKEN_ERROR_MAPPING
Hash mapping token types and dedicated error labels.
@return [Hash]
Public Class Methods
@param [String|IO] data The input to parse. @param [Hash] options @see [Oga::XML::Lexer#initialize]
# File lib/oga/xml/parser.rb, line 212 def initialize(data, options = {}) @data = data @lexer = Lexer.new(data, options) @line = 1 @lexer.reset_native end
Public Instance Methods
# File lib/oga/xml/parser.rb, line 362 def _rule_0(val) on_document(val[0]) end
# File lib/oga/xml/parser.rb, line 366 def _rule_1(val) val[0] end
# File lib/oga/xml/parser.rb, line 410 def _rule_10(val) val[0].inject(:+) end
# File lib/oga/xml/parser.rb, line 414 def _rule_11(val) on_cdata(val[1]) end
# File lib/oga/xml/parser.rb, line 418 def _rule_12(val) val[0] + val[1] end
# File lib/oga/xml/parser.rb, line 422 def _rule_13(val) '' end
# File lib/oga/xml/parser.rb, line 426 def _rule_14(val) on_comment(val[1]) end
# File lib/oga/xml/parser.rb, line 430 def _rule_15(val) val[0] + val[1] end
# File lib/oga/xml/parser.rb, line 434 def _rule_16(val) '' end
# File lib/oga/xml/parser.rb, line 438 def _rule_17(val) on_proc_ins(val[1], val[2]) end
# File lib/oga/xml/parser.rb, line 444 def _rule_18(val) val[0] + val[1] end
# File lib/oga/xml/parser.rb, line 448 def _rule_19(val) '' end
# File lib/oga/xml/parser.rb, line 370 def _rule_2(val) val[0] end
# File lib/oga/xml/parser.rb, line 452 def _rule_20(val) [nil, val[0]] end
# File lib/oga/xml/parser.rb, line 456 def _rule_21(val) val end
# File lib/oga/xml/parser.rb, line 460 def _rule_22(val) on_element(val[0][0], val[0][1], val[1]) end
# File lib/oga/xml/parser.rb, line 466 def _rule_23(val) if val[0] on_element_children(val[0], val[1]) end after_element(val[0]) end
# File lib/oga/xml/parser.rb, line 476 def _rule_24(val) on_attributes(val[0]) end
# File lib/oga/xml/parser.rb, line 480 def _rule_25(val) on_attribute(val[1], val[0], val[2]) end
# File lib/oga/xml/parser.rb, line 484 def _rule_26(val) on_attribute(val[0], nil, val[1]) end
# File lib/oga/xml/parser.rb, line 488 def _rule_27(val) on_xml_decl(val[1]) end
# File lib/oga/xml/parser.rb, line 492 def _rule_28(val) text = val[1] ? val[0] + val[1] : val[0] on_text(text) end
# File lib/oga/xml/parser.rb, line 500 def _rule_29(val) val[1] ? val[0] + val[1] : val[0] end
# File lib/oga/xml/parser.rb, line 374 def _rule_3(val) val[0] end
# File lib/oga/xml/parser.rb, line 504 def _rule_30(val) nil end
# File lib/oga/xml/parser.rb, line 508 def _rule_31(val) val[1] end
# File lib/oga/xml/parser.rb, line 512 def _rule_32(val) val[1] end
# File lib/oga/xml/parser.rb, line 516 def _rule_33(val) val[0] + val[1] end
# File lib/oga/xml/parser.rb, line 520 def _rule_34(val) '' end
# File lib/oga/xml/parser.rb, line 524 def _rule_35(val) val[0] end
# File lib/oga/xml/parser.rb, line 528 def _rule_36(val) val[0] end
# File lib/oga/xml/parser.rb, line 532 def _rule_37(val) val[0] end
# File lib/oga/xml/parser.rb, line 536 def _rule_38(val) val[0] end
# File lib/oga/xml/parser.rb, line 540 def _rule_39(val) val[0] end
# File lib/oga/xml/parser.rb, line 378 def _rule_4(val) val[0] end
# File lib/oga/xml/parser.rb, line 544 def _rule_40(val) val[0] end
# File lib/oga/xml/parser.rb, line 548 def _rule_41(val) val[0] end
# File lib/oga/xml/parser.rb, line 552 def _rule_42(val) val[0] end
# File lib/oga/xml/parser.rb, line 382 def _rule_5(val) val[0] end
# File lib/oga/xml/parser.rb, line 386 def _rule_6(val) val[0] end
# File lib/oga/xml/parser.rb, line 390 def _rule_7(val) val[0] end
# File lib/oga/xml/parser.rb, line 394 def _rule_8(val) val[0] end
# File lib/oga/xml/parser.rb, line 398 def _rule_9(val) on_doctype( :name => val[1], :type => val[2], :public_id => val[3], :system_id => val[4], :inline_rules => val[5] ) end
@param [Oga::XML::Element] element @return [Oga::XML::Element]
# File lib/oga/xml/parser.rb, line 341 def after_element(element) element end
Yields the next token from the lexer.
@yieldparam [Array]
# File lib/oga/xml/parser.rb, line 222 def each_token @lexer.advance do |type, value, line| @line = line if line yield [type, value] end yield [-1, -1] end
@param [String] name @param [String] ns_name @param [String] value @return [Oga::XML::Attribute]
# File lib/oga/xml/parser.rb, line 349 def on_attribute(name, ns_name = nil, value = nil) Attribute.new( :namespace_name => ns_name, :name => name, :value => value ) end
@param [Array] attrs
# File lib/oga/xml/parser.rb, line 358 def on_attributes(attrs) attrs end
@param [String] text @return [Oga::XML::Cdata]
# File lib/oga/xml/parser.rb, line 281 def on_cdata(text = nil) Cdata.new(:text => text) end
@param [String] text @return [Oga::XML::Comment]
# File lib/oga/xml/parser.rb, line 287 def on_comment(text = nil) Comment.new(:text => text) end
@param [Hash] options
# File lib/oga/xml/parser.rb, line 275 def on_doctype(options = {}) Doctype.new(options) end
@param [Array] children @return [Oga::XML::Document]
# File lib/oga/xml/parser.rb, line 256 def on_document(children = []) document = Document.new(:type => @lexer.html? ? :html : :xml) children.each do |child| if child.is_a?(Doctype) document.doctype = child elsif child.is_a?(XmlDeclaration) document.xml_declaration = child else document.children << child end end document end
@param [String] namespace @param [String] name @param [Hash] attributes @return [Oga::XML::Element]
# File lib/oga/xml/parser.rb, line 320 def on_element(namespace, name, attributes = {}) element = Element.new( :namespace_name => namespace, :name => name, :attributes => attributes ) element end
@param [Oga::XML::Element] element @param [Array] children @return [Oga::XML::Element]
# File lib/oga/xml/parser.rb, line 333 def on_element_children(element, children = []) element.children = children element end
@param [String] name @param [String] text @return [Oga::XML::ProcessingInstruction]
# File lib/oga/xml/parser.rb, line 294 def on_proc_ins(name, text = nil) ProcessingInstruction.new(:name => name, :text => text) end
@param [String] text @return [Oga::XML::Text]
# File lib/oga/xml/parser.rb, line 312 def on_text(text) Text.new(:text => text) end
@param [Array] attributes @return [Oga::XML::XmlDeclaration]
# File lib/oga/xml/parser.rb, line 300 def on_xml_decl(attributes = []) options = {} attributes.each do |attr| options[attr.name.to_sym] = attr.value end XmlDeclaration.new(options) end
@param [Fixnum] stack_type @param [Fixnum] stack_value @param [Symbol] token_type @param [String] token_value
# File lib/oga/xml/parser.rb, line 236 def parser_error(stack_type, stack_value, token_type, token_value) case id_to_type(stack_type) when :rule message = "Unexpected #{token_type} for rule #{stack_value}" when :terminal expected = id_to_terminal(stack_value) expected = TOKEN_ERROR_MAPPING[expected] || expected got = TOKEN_ERROR_MAPPING[token_type] || token_type message = "Unexpected #{got}, expected #{expected} instead" when :eof message = 'Unexpected end of input' end message += " on line #{@line}" raise LL::ParserError, message end