class MultiSAX::SAX

The class to handle XML libraries.

Public Class Methods

new() click to toggle source

constructor. list cannot be passed directly, since you should check the retval of open().

# File lib/multisax.rb, line 26
def initialize
        @parser=nil
end

Public Instance Methods

__init__(obj) click to toggle source
# File lib/multisax.rb, line 50
def __init__(obj)
        @obj=obj
        @saxwrapper_tag=nil
        @saxwrapper_attr={}
        self
end
after_element(ns,tag) click to toggle source
# File lib/multisax.rb, line 173
def after_element(ns,tag)
        tag_name=(ns ? (ns+':') : '')+tag
        @obj.sax_tag_end(tag_name)
end
attr(name,str) click to toggle source
# File lib/multisax.rb, line 72
def attr(name,str)
        @saxwrapper_attr[name.to_s]=str
end
attrs_done() click to toggle source
# File lib/multisax.rb, line 81
def attrs_done
        @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
end
attrs_done_normal() click to toggle source
# File lib/multisax.rb, line 78
def attrs_done_normal
        @obj.sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr)
end
attrs_done_xmldecl() click to toggle source
# File lib/multisax.rb, line 75
def attrs_done_xmldecl
        @obj.sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone'])
end
cdata(txt) click to toggle source
# File lib/multisax.rb, line 57
def cdata(txt) @obj.sax_cdata(txt) end
cdata_block(txt) click to toggle source
# File lib/multisax.rb, line 122
def cdata_block(txt) @obj.sax_cdata(txt) end
character(txt) click to toggle source
# File lib/multisax.rb, line 142
def character(txt)
        if @cdata
                @obj.sax_cdata(txt)
        else
                @obj.sax_text(txt)
        end
end
characters(txt) click to toggle source
# File lib/multisax.rb, line 121
def characters(txt) @obj.sax_text(txt) end
comment(txt) click to toggle source
# File lib/multisax.rb, line 59
def comment(txt) @obj.sax_comment(txt) end
endCdata() click to toggle source
# File lib/multisax.rb, line 152
def endCdata
        @cdata=false
end
endElement(tag) click to toggle source
# File lib/multisax.rb, line 139
def endElement(tag) @obj.sax_tag_end(tag) end
end_element(tag) click to toggle source
# File lib/multisax.rb, line 56
def end_element(tag) @obj.sax_tag_end(tag.to_s) end
error(s,i,j) click to toggle source
# File lib/multisax.rb, line 84
def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
on_cdata(txt) click to toggle source
# File lib/multisax.rb, line 178
def on_cdata(txt) @obj.sax_cdata(txt) end
on_cdata_block(txt) click to toggle source
# File lib/multisax.rb, line 101
def on_cdata_block(txt) @obj.sax_cdata(txt) end
on_characters(txt) click to toggle source
# File lib/multisax.rb, line 100
def on_characters(txt) @obj.sax_text(txt) end
on_comment(txt) click to toggle source
# File lib/multisax.rb, line 102
def on_comment(txt) @obj.sax_comment(txt) end
on_element(ns,tag,attrs) click to toggle source
# File lib/multisax.rb, line 168
def on_element(ns,tag,attrs)
        tag_name=(ns ? (ns+':') : '')+tag
        @obj.sax_tag_start(tag_name,Hash[*attrs.flatten(1)])
        return tag_name
end
on_end_element(tag) click to toggle source
# File lib/multisax.rb, line 99
def on_end_element(tag) @obj.sax_tag_end(tag) end
on_start_element(tag,attrs) click to toggle source
# File lib/multisax.rb, line 98
def on_start_element(tag,attrs) @obj.sax_tag_start(tag,attrs) end
on_text(txt) click to toggle source
# File lib/multisax.rb, line 177
def on_text(txt) @obj.sax_text(txt) end
on_xml_decl(args) click to toggle source
# File lib/multisax.rb, line 180
def on_xml_decl(args)
        attrs=Hash[*args.flatten(1)]
        @obj.sax_xmldecl(attrs['version'],attrs['encoding'],attrs['standalone'])
end
open(*list) click to toggle source

Library loader. Arguments are list (or Array) of libraries.

if list is empty or :XML, the following are searched (order by speed):
:ox, :libxml, :xmlparser, :nokogiri, :oga, :rexmlstream, :rexmlsax2
if list is :HTML, the following are searched (order by speed):
:oxhtml, :nokogirihtml, :ogahtml
You can also specify libraries individually.
If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
# File lib/multisax.rb, line 37
def open(*list)
        return @parser if @parser
        list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML]
        list=HTML_PARSERS_INSTALLABLE if list==[:HTML]
        list.each{|e_module|
                case e_module
                        when :ox,:oxhtml
                                begin
                                        require 'ox'
                                        require 'stringio' #this should be standard module.
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new(::Ox::Sax){
                                        def __init__(obj)
                                                @obj=obj
                                                @saxwrapper_tag=nil
                                                @saxwrapper_attr={}
                                                self
                                        end
                                        def end_element(tag) @obj.sax_tag_end(tag.to_s) end
                                        def cdata(txt) @obj.sax_cdata(txt) end
                                        def text(txt) @obj.sax_text(txt) end
                                        def comment(txt) @obj.sax_comment(txt) end

                                        def start_element(tag)
                                                if @after_error
                                                        @obj.sax_tag_start(tag.to_s,{})
                                                        @after_error=false
                                                else
                                                        # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
                                                        #alias :attrs_done :attrs_done_normal
                                                        @saxwrapper_tag=tag
                                                        @saxwrapper_attr={}
                                                end
                                        end
                                        def attr(name,str)
                                                @saxwrapper_attr[name.to_s]=str
                                        end
                                        def attrs_done_xmldecl
                                                @obj.sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone'])
                                        end
                                        def attrs_done_normal
                                                @obj.sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr)
                                        end
                                        def attrs_done
                                                @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
                                        end
                                        def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
                                }
                                break
                        when :libxml
                                begin
                                        require 'libxml'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new{
                                        include ::LibXML::XML::SaxParser::Callbacks
                                        def __init__(obj)
                                                @obj=obj
                                                self
                                        end
                                        def on_start_element(tag,attrs) @obj.sax_tag_start(tag,attrs) end
                                        def on_end_element(tag) @obj.sax_tag_end(tag) end
                                        def on_characters(txt) @obj.sax_text(txt) end
                                        def on_cdata_block(txt) @obj.sax_cdata(txt) end
                                        def on_comment(txt) @obj.sax_comment(txt) end
                                        #actually unused
                                        def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                }
                                break
                        when :nokogiri,:nokogirihtml
                                #nokogiri 1.5.x are supported on Ruby 1.8.7.
                                #next if RUBY_VERSION<'1.9'
                                begin
                                        require 'nokogiri'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new(::Nokogiri::XML::SAX::Document){
                                        def __init__(obj)
                                                @obj=obj
                                                self
                                        end
                                        def start_element(tag,attrs) @obj.sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end
                                        def end_element(tag) @obj.sax_tag_end(tag) end
                                        def characters(txt) @obj.sax_text(txt) end
                                        def cdata_block(txt) @obj.sax_cdata(txt) end
                                        def comment(txt) @obj.sax_comment(txt) end
                                        def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                }
                                break
                        when :xmlparser
                                begin
                                        require 'xml/saxdriver'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new(::XML::Parser){
                                        def __init__(obj)
                                                @obj=obj
                                                @cdata=false
                                                self
                                        end
                                        def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
                                        def endElement(tag) @obj.sax_tag_end(tag) end
                                        def comment(txt) @obj.sax_comment(txt) end
                                        def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                        def character(txt)
                                                if @cdata
                                                        @obj.sax_cdata(txt)
                                                else
                                                        @obj.sax_text(txt)
                                                end
                                        end
                                        def startCdata
                                                @cdata=true
                                        end
                                        def endCdata
                                                @cdata=false
                                        end
                                }
                                break
                        when :oga,:ogahtml
                                next if RUBY_VERSION<'1.9'
                                begin
                                        require 'oga'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new{
                                        def __init__(obj)
                                                @obj=obj
                                                self
                                        end
                                        def on_element(ns,tag,attrs)
                                                tag_name=(ns ? (ns+':') : '')+tag
                                                @obj.sax_tag_start(tag_name,Hash[*attrs.flatten(1)])
                                                return tag_name
                                        end
                                        def after_element(ns,tag)
                                                tag_name=(ns ? (ns+':') : '')+tag
                                                @obj.sax_tag_end(tag_name)
                                        end
                                        def on_text(txt) @obj.sax_text(txt) end
                                        def on_cdata(txt) @obj.sax_cdata(txt) end
                                        def on_comment(txt) @obj.sax_comment(txt) end
                                        def on_xml_decl(args)
                                                attrs=Hash[*args.flatten(1)]
                                                @obj.sax_xmldecl(attrs['version'],attrs['encoding'],attrs['standalone'])
                                        end
                                }
                                break
                        # :nocov:
                        when :xerces
                                begin
                                        require 'Xerces'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new(::XercesR::DocumentHandler){
                                        def __init__(obj)
                                                @obj=obj
                                                #@cdata=false
                                                self
                                        end
                                        def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end
                                        def endElement(tag) @obj.sax_tag_end(tag) end
                                        def comment(txt) @obj.sax_comment(txt) end
                                        #def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                        #def notationDecl(name, publicId, systemId) end
                                        def characters(txt,len)
                                                #if @cdata
                                                        @obj.sax_cdata(txt)
                                                #else
                                                        @obj.sax_text(txt)
                                                #end
                                        end
                                        #def startCdata
                                        #        @cdata=true
                                        #end
                                        #def endCdata
                                        #        @cdata=false
                                        #end
                                }
                                break
                        # :nocov:
                        when :rexmlstream
                                begin
                                        require 'rexml/parsers/baseparser'
                                        require 'rexml/parsers/streamparser'
                                        require 'rexml/streamlistener'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new{
                                        include ::REXML::StreamListener
                                        def __init__(obj)
                                                @obj=obj
                                                self
                                        end
                                        def tag_start(tag,attrs) @obj.sax_tag_start(tag,attrs) end
                                        def tag_end(tag) @obj.sax_tag_end(tag) end
                                        def text(txt) @obj.sax_text(txt) end
                                        def cdata(txt) @obj.sax_cdata(txt) end
                                        def comment(txt) @obj.sax_comment(txt) end
                                        def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                }
                                break
                        when :rexmlsax2
                                begin
                                        require 'rexml/parsers/sax2parser'
                                        require 'rexml/sax2listener'
                                rescue LoadError;next end
                                @parser=e_module
                                @saxhelper=Class.new{
                                        include ::REXML::SAX2Listener
                                        def __init__(obj)
                                                @obj=obj
                                                self
                                        end
                                        def start_element(uri,tag,qname,attrs) @obj.sax_tag_start(qname,attrs) end
                                        def end_element(uri,tag,qname) @obj.sax_tag_end(qname) end
                                        def characters(txt) @obj.sax_text(txt) end
                                        def cdata(txt) @obj.sax_cdata(txt) end
                                        def comment(txt) @obj.sax_comment(txt) end
                                        def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
                                }
                                break
                end
        }
        return @parser
end
parse(source,listener) click to toggle source

The main parsing method. Listener can be Class.new{include MultiSAX::Callbacks}.new. Returns the listener after SAX is applied. If you have not called open(), this will call it using default value (all libraries).

From 0.0.1, source can be IO as well as String.
SAX's listeners are usually modified destructively.
So instances shouldn't be provided.
# File lib/multisax.rb, line 275
def parse(source,listener)
        if !@parser && !open
                raise "Failed to open SAX library. REXML, which is a standard Ruby module, might be also corrupted."
        end
        saxhelper=@saxhelper.new.__init__(listener)
        if source.is_a?(String)
                case @parser
                        when :ox           then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true)
                        when :oxhtml       then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true,:smart=>true)
                        when :libxml       then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
                        when :nokogiri     then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
                        when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
                        when :xmlparser    then saxhelper.parse(source)
                        when :oga          then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
                        when :ogahtml      then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
                        when :xerces       then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source)
                        when :rexmlstream  then REXML::Parsers::StreamParser.new(source,saxhelper).parse
                        when :rexmlsax2    then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
                end
        else
                case @parser
                        when :ox           then Ox.sax_parse(saxhelper,source,:convert_special=>true)
                        when :oxhtml       then Ox.sax_parse(saxhelper,source,:convert_special=>true,:smart=>true)
                        when :libxml       then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
                        when :nokogiri     then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
                        when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
                        when :xmlparser    then saxhelper.parse(source)
                        when :oga          then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
                        when :ogahtml      then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
                        when :xerces       then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read)
                        when :rexmlstream  then REXML::Parsers::StreamParser.new(source,saxhelper).parse
                        when :rexmlsax2    then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
                end
        end
        listener
end
parsefile(filename,listener) click to toggle source

Parses file as XML. Error handling might be changed in the future.

# File lib/multisax.rb, line 313
def parsefile(filename,listener)
        #begin
                return nil unless FileTest::readable?(filename)
                File.open(filename,'rb'){|f|
                        return parse(f,listener)
                }
        #rescue
        #    return nil
        #end
end
parser() click to toggle source

Returns which module is actually chosen.

# File lib/multisax.rb, line 267
def parser() @parser end
reset() click to toggle source

Reset MultiSAX state so that you can re-open() another library.

# File lib/multisax.rb, line 265
def reset() @parser=nil end
startCdata() click to toggle source
# File lib/multisax.rb, line 149
def startCdata
        @cdata=true
end
startElement(tag,attrs) click to toggle source
# File lib/multisax.rb, line 138
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
start_element(tag) click to toggle source
# File lib/multisax.rb, line 61
def start_element(tag)
        if @after_error
                @obj.sax_tag_start(tag.to_s,{})
                @after_error=false
        else
                # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
                #alias :attrs_done :attrs_done_normal
                @saxwrapper_tag=tag
                @saxwrapper_attr={}
        end
end
tag_end(tag) click to toggle source
# File lib/multisax.rb, line 233
def tag_end(tag) @obj.sax_tag_end(tag) end
tag_start(tag,attrs) click to toggle source
# File lib/multisax.rb, line 232
def tag_start(tag,attrs) @obj.sax_tag_start(tag,attrs) end
text(txt) click to toggle source
# File lib/multisax.rb, line 58
def text(txt) @obj.sax_text(txt) end
xmlDecl(version,encoding,standalone) click to toggle source
# File lib/multisax.rb, line 141
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
xmldecl(version,encoding,standalone) click to toggle source

actually unused

# File lib/multisax.rb, line 104
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end