class MultiSAX::SAX
The class to handle XML libraries.
Public Class Methods
new()
click to toggle source
constructor. list cannot be passed directly, since you should check the retval of open().
# File lib/multisax.rb, line 26 def initialize @parser=nil end
Public Instance Methods
__init__(obj)
click to toggle source
# File lib/multisax.rb, line 50 def __init__(obj) @obj=obj @saxwrapper_tag=nil @saxwrapper_attr={} self end
after_element(ns,tag)
click to toggle source
# File lib/multisax.rb, line 173 def after_element(ns,tag) tag_name=(ns ? (ns+':') : '')+tag @obj.sax_tag_end(tag_name) end
attr(name,str)
click to toggle source
# File lib/multisax.rb, line 72 def attr(name,str) @saxwrapper_attr[name.to_s]=str end
attrs_done()
click to toggle source
# File lib/multisax.rb, line 81 def attrs_done @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl end
attrs_done_normal()
click to toggle source
# File lib/multisax.rb, line 78 def attrs_done_normal @obj.sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr) end
attrs_done_xmldecl()
click to toggle source
# File lib/multisax.rb, line 75 def attrs_done_xmldecl @obj.sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone']) end
cdata(txt)
click to toggle source
# File lib/multisax.rb, line 57 def cdata(txt) @obj.sax_cdata(txt) end
cdata_block(txt)
click to toggle source
# File lib/multisax.rb, line 122 def cdata_block(txt) @obj.sax_cdata(txt) end
character(txt)
click to toggle source
# File lib/multisax.rb, line 142 def character(txt) if @cdata @obj.sax_cdata(txt) else @obj.sax_text(txt) end end
characters(txt)
click to toggle source
# File lib/multisax.rb, line 121 def characters(txt) @obj.sax_text(txt) end
comment(txt)
click to toggle source
# File lib/multisax.rb, line 59 def comment(txt) @obj.sax_comment(txt) end
endCdata()
click to toggle source
# File lib/multisax.rb, line 152 def endCdata @cdata=false end
endElement(tag)
click to toggle source
# File lib/multisax.rb, line 139 def endElement(tag) @obj.sax_tag_end(tag) end
end_element(tag)
click to toggle source
# File lib/multisax.rb, line 56 def end_element(tag) @obj.sax_tag_end(tag.to_s) end
error(s,i,j)
click to toggle source
# File lib/multisax.rb, line 84 def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
on_cdata(txt)
click to toggle source
# File lib/multisax.rb, line 178 def on_cdata(txt) @obj.sax_cdata(txt) end
on_cdata_block(txt)
click to toggle source
# File lib/multisax.rb, line 101 def on_cdata_block(txt) @obj.sax_cdata(txt) end
on_characters(txt)
click to toggle source
# File lib/multisax.rb, line 100 def on_characters(txt) @obj.sax_text(txt) end
on_comment(txt)
click to toggle source
# File lib/multisax.rb, line 102 def on_comment(txt) @obj.sax_comment(txt) end
on_element(ns,tag,attrs)
click to toggle source
# File lib/multisax.rb, line 168 def on_element(ns,tag,attrs) tag_name=(ns ? (ns+':') : '')+tag @obj.sax_tag_start(tag_name,Hash[*attrs.flatten(1)]) return tag_name end
on_end_element(tag)
click to toggle source
# File lib/multisax.rb, line 99 def on_end_element(tag) @obj.sax_tag_end(tag) end
on_start_element(tag,attrs)
click to toggle source
# File lib/multisax.rb, line 98 def on_start_element(tag,attrs) @obj.sax_tag_start(tag,attrs) end
on_text(txt)
click to toggle source
# File lib/multisax.rb, line 177 def on_text(txt) @obj.sax_text(txt) end
on_xml_decl(args)
click to toggle source
# File lib/multisax.rb, line 180 def on_xml_decl(args) attrs=Hash[*args.flatten(1)] @obj.sax_xmldecl(attrs['version'],attrs['encoding'],attrs['standalone']) end
open(*list)
click to toggle source
Library loader. Arguments are list (or Array) of libraries.
if list is empty or :XML, the following are searched (order by speed): :ox, :libxml, :xmlparser, :nokogiri, :oga, :rexmlstream, :rexmlsax2 if list is :HTML, the following are searched (order by speed): :oxhtml, :nokogirihtml, :ogahtml You can also specify libraries individually. If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
# File lib/multisax.rb, line 37 def open(*list) return @parser if @parser list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML] list=HTML_PARSERS_INSTALLABLE if list==[:HTML] list.each{|e_module| case e_module when :ox,:oxhtml begin require 'ox' require 'stringio' #this should be standard module. rescue LoadError;next end @parser=e_module @saxhelper=Class.new(::Ox::Sax){ def __init__(obj) @obj=obj @saxwrapper_tag=nil @saxwrapper_attr={} self end def end_element(tag) @obj.sax_tag_end(tag.to_s) end def cdata(txt) @obj.sax_cdata(txt) end def text(txt) @obj.sax_text(txt) end def comment(txt) @obj.sax_comment(txt) end def start_element(tag) if @after_error @obj.sax_tag_start(tag.to_s,{}) @after_error=false else # I hope provided Listener's sax_tag_start will NOT be used elsewhere. #alias :attrs_done :attrs_done_normal @saxwrapper_tag=tag @saxwrapper_attr={} end end def attr(name,str) @saxwrapper_attr[name.to_s]=str end def attrs_done_xmldecl @obj.sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone']) end def attrs_done_normal @obj.sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr) end def attrs_done @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl end def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end } break when :libxml begin require 'libxml' rescue LoadError;next end @parser=e_module @saxhelper=Class.new{ include ::LibXML::XML::SaxParser::Callbacks def __init__(obj) @obj=obj self end def on_start_element(tag,attrs) @obj.sax_tag_start(tag,attrs) end def on_end_element(tag) @obj.sax_tag_end(tag) end def on_characters(txt) @obj.sax_text(txt) end def on_cdata_block(txt) @obj.sax_cdata(txt) end def on_comment(txt) @obj.sax_comment(txt) end #actually unused def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end } break when :nokogiri,:nokogirihtml #nokogiri 1.5.x are supported on Ruby 1.8.7. #next if RUBY_VERSION<'1.9' begin require 'nokogiri' rescue LoadError;next end @parser=e_module @saxhelper=Class.new(::Nokogiri::XML::SAX::Document){ def __init__(obj) @obj=obj self end def start_element(tag,attrs) @obj.sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end def end_element(tag) @obj.sax_tag_end(tag) end def characters(txt) @obj.sax_text(txt) end def cdata_block(txt) @obj.sax_cdata(txt) end def comment(txt) @obj.sax_comment(txt) end def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end } break when :xmlparser begin require 'xml/saxdriver' rescue LoadError;next end @parser=e_module @saxhelper=Class.new(::XML::Parser){ def __init__(obj) @obj=obj @cdata=false self end def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end def endElement(tag) @obj.sax_tag_end(tag) end def comment(txt) @obj.sax_comment(txt) end def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end def character(txt) if @cdata @obj.sax_cdata(txt) else @obj.sax_text(txt) end end def startCdata @cdata=true end def endCdata @cdata=false end } break when :oga,:ogahtml next if RUBY_VERSION<'1.9' begin require 'oga' rescue LoadError;next end @parser=e_module @saxhelper=Class.new{ def __init__(obj) @obj=obj self end def on_element(ns,tag,attrs) tag_name=(ns ? (ns+':') : '')+tag @obj.sax_tag_start(tag_name,Hash[*attrs.flatten(1)]) return tag_name end def after_element(ns,tag) tag_name=(ns ? (ns+':') : '')+tag @obj.sax_tag_end(tag_name) end def on_text(txt) @obj.sax_text(txt) end def on_cdata(txt) @obj.sax_cdata(txt) end def on_comment(txt) @obj.sax_comment(txt) end def on_xml_decl(args) attrs=Hash[*args.flatten(1)] @obj.sax_xmldecl(attrs['version'],attrs['encoding'],attrs['standalone']) end } break # :nocov: when :xerces begin require 'Xerces' rescue LoadError;next end @parser=e_module @saxhelper=Class.new(::XercesR::DocumentHandler){ def __init__(obj) @obj=obj #@cdata=false self end def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end def endElement(tag) @obj.sax_tag_end(tag) end def comment(txt) @obj.sax_comment(txt) end #def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end #def notationDecl(name, publicId, systemId) end def characters(txt,len) #if @cdata @obj.sax_cdata(txt) #else @obj.sax_text(txt) #end end #def startCdata # @cdata=true #end #def endCdata # @cdata=false #end } break # :nocov: when :rexmlstream begin require 'rexml/parsers/baseparser' require 'rexml/parsers/streamparser' require 'rexml/streamlistener' rescue LoadError;next end @parser=e_module @saxhelper=Class.new{ include ::REXML::StreamListener def __init__(obj) @obj=obj self end def tag_start(tag,attrs) @obj.sax_tag_start(tag,attrs) end def tag_end(tag) @obj.sax_tag_end(tag) end def text(txt) @obj.sax_text(txt) end def cdata(txt) @obj.sax_cdata(txt) end def comment(txt) @obj.sax_comment(txt) end def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end } break when :rexmlsax2 begin require 'rexml/parsers/sax2parser' require 'rexml/sax2listener' rescue LoadError;next end @parser=e_module @saxhelper=Class.new{ include ::REXML::SAX2Listener def __init__(obj) @obj=obj self end def start_element(uri,tag,qname,attrs) @obj.sax_tag_start(qname,attrs) end def end_element(uri,tag,qname) @obj.sax_tag_end(qname) end def characters(txt) @obj.sax_text(txt) end def cdata(txt) @obj.sax_cdata(txt) end def comment(txt) @obj.sax_comment(txt) end def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end } break end } return @parser end
parse(source,listener)
click to toggle source
The main parsing method. Listener can be Class.new{include MultiSAX::Callbacks
}.new. Returns the listener after SAX
is applied. If you have not called open(), this will call it using default value (all libraries).
From 0.0.1, source can be IO as well as String. SAX's listeners are usually modified destructively. So instances shouldn't be provided.
# File lib/multisax.rb, line 275 def parse(source,listener) if !@parser && !open raise "Failed to open SAX library. REXML, which is a standard Ruby module, might be also corrupted." end saxhelper=@saxhelper.new.__init__(listener) if source.is_a?(String) case @parser when :ox then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true) when :oxhtml then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true,:smart=>true) when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source) when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source) when :xmlparser then saxhelper.parse(source) when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source) when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse end else case @parser when :ox then Ox.sax_parse(saxhelper,source,:convert_special=>true) when :oxhtml then Ox.sax_parse(saxhelper,source,:convert_special=>true,:smart=>true) when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source) when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors. when :xmlparser then saxhelper.parse(source) when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read) when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse end end listener end
parsefile(filename,listener)
click to toggle source
Parses file as XML. Error handling might be changed in the future.
# File lib/multisax.rb, line 313 def parsefile(filename,listener) #begin return nil unless FileTest::readable?(filename) File.open(filename,'rb'){|f| return parse(f,listener) } #rescue # return nil #end end
parser()
click to toggle source
Returns which module is actually chosen.
# File lib/multisax.rb, line 267 def parser() @parser end
reset()
click to toggle source
Reset MultiSAX
state so that you can re-open() another library.
# File lib/multisax.rb, line 265 def reset() @parser=nil end
startCdata()
click to toggle source
# File lib/multisax.rb, line 149 def startCdata @cdata=true end
startElement(tag,attrs)
click to toggle source
# File lib/multisax.rb, line 138 def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
start_element(tag)
click to toggle source
# File lib/multisax.rb, line 61 def start_element(tag) if @after_error @obj.sax_tag_start(tag.to_s,{}) @after_error=false else # I hope provided Listener's sax_tag_start will NOT be used elsewhere. #alias :attrs_done :attrs_done_normal @saxwrapper_tag=tag @saxwrapper_attr={} end end
tag_end(tag)
click to toggle source
# File lib/multisax.rb, line 233 def tag_end(tag) @obj.sax_tag_end(tag) end
tag_start(tag,attrs)
click to toggle source
# File lib/multisax.rb, line 232 def tag_start(tag,attrs) @obj.sax_tag_start(tag,attrs) end
text(txt)
click to toggle source
# File lib/multisax.rb, line 58 def text(txt) @obj.sax_text(txt) end
xmlDecl(version,encoding,standalone)
click to toggle source
# File lib/multisax.rb, line 141 def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
xmldecl(version,encoding,standalone)
click to toggle source
actually unused
# File lib/multisax.rb, line 104 def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end