module MultiXml
Each MultiXml parser is expected to parse an XML document into a Hash. The conversion rules are:
-
Each document starts out as an empty Hash.
-
Reading an element created an entry in the parent Hash that has a key of the element name and a value of a Hash with attributes as key value pairs. Children are added as described by this rule.
-
Text and CDATE is stored in the parent element Hash with a key of MultiXml::CONTENT_ROOT and a value of the text itself.
-
If a key already exists in the Hash then the value associated with the key is converted to an Array with the old and new value in it.
-
Other elements such as the xml prolog, doctype, and comments are ignored.
Constants
- CONTENT_ROOT
- DEFAULT_OPTIONS
- DISALLOWED_XML_TYPES
- PARSING
- REQUIREMENT_MAP
- TYPE_NAMES
Public Class Methods
The default parser based on what you currently have loaded and installed. First checks to see if any parsers are already loaded, then checks to see which are installed if none are loaded.
# File lib/multi_xml.rb, line 87 def default_parser return :ox if defined?(::Ox) return :libxml if defined?(::LibXML) return :nokogiri if defined?(::Nokogiri) return :oga if defined?(::Oga) REQUIREMENT_MAP.each do |library, parser| begin require library return parser rescue LoadError next end end raise(NoParserError.new("No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42.")) end
Parse an XML string or IO into Ruby.
Options
:symbolize_keys
-
If true, will use symbols instead of strings for the keys.
:disallowed_types
-
Types to disallow from being typecasted. Defaults to `['yaml', 'symbol']`. Use `[]` to allow all types.
:typecast_xml_value
-
If true, won't typecast values for parsed document
# File lib/multi_xml.rb, line 133 def parse(xml, options = {}) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity xml ||= '' options = DEFAULT_OPTIONS.merge(options) xml = xml.strip if xml.respond_to?(:strip) begin xml = StringIO.new(xml) unless xml.respond_to?(:read) char = xml.getc return {} if char.nil? xml.ungetc(char) hash = undasherize_keys(parser.parse(xml) || {}) hash = options[:typecast_xml_value] ? typecast_xml_value(hash, options[:disallowed_types]) : hash rescue DisallowedTypeError raise rescue parser.parse_error => error raise(ParseError, error.message, error.backtrace) # rubocop:disable RaiseArgs end hash = symbolize_keys(hash) if options[:symbolize_keys] hash end
Get the current parser class.
# File lib/multi_xml.rb, line 77 def parser return @parser if defined?(@parser) self.parser = default_parser @parser end
Set the XML parser utilizing a symbol, string, or class. Supported by default are:
-
:libxml
-
:nokogiri
-
:ox
-
:rexml
-
:oga
# File lib/multi_xml.rb, line 112 def parser=(new_parser) case new_parser when String, Symbol require "multi_xml/parsers/#{new_parser.to_s.downcase}" @parser = MultiXml::Parsers.const_get(new_parser.to_s.split('_').collect(&:capitalize).join('').to_s) when Class, Module @parser = new_parser else raise('Did not recognize your parser specification. Please specify either a symbol or a class.') end end
Private Class Methods
# File lib/multi_xml.rb, line 183 def parse_file(file, entity) f = StringIO.new(Base64.decode64(file)) f.extend(FileLike) f.original_filename = entity['name'] f.content_type = entity['content_type'] f end
# File lib/multi_xml.rb, line 191 def symbolize_keys(params) case params when Hash params.inject({}) do |result, (key, value)| result.merge(key.to_sym => symbolize_keys(value)) end when Array params.collect { |value| symbolize_keys(value) } else params end end
# File lib/multi_xml.rb, line 218 def typecast_xml_value(value, disallowed_types = nil) # rubocop:disable AbcSize, CyclomaticComplexity, MethodLength, PerceivedComplexity disallowed_types ||= DISALLOWED_XML_TYPES case value when Hash if value.include?('type') && !value['type'].is_a?(Hash) && disallowed_types.include?(value['type']) raise(DisallowedTypeError.new(value['type'])) end if value['type'] == 'array' # this commented-out suggestion helps to avoid the multiple attribute # problem, but it breaks when there is only one item in the array. # # from: https://github.com/jnunemaker/httparty/issues/102 # # _, entries = value.detect { |k, v| k != 'type' && v.is_a?(Array) } # This attempt fails to consider the order that the detect method # retrieves the entries. # _, entries = value.detect {|key, _| key != 'type'} # This approach ignores attribute entries that are not convertable # to an Array which allows attributes to be ignored. _, entries = value.detect { |k, v| k != 'type' && (v.is_a?(Array) || v.is_a?(Hash)) } case entries when NilClass [] when String [] if entries.strip.empty? when Array entries.collect { |entry| typecast_xml_value(entry, disallowed_types) } when Hash [typecast_xml_value(entries, disallowed_types)] else raise("can't typecast #{entries.class.name}: #{entries.inspect}") end elsif value.key?(CONTENT_ROOT) content = value[CONTENT_ROOT] block = PARSING[value['type']] if block if block.arity == 1 value.delete('type') if PARSING[value['type']] if value.keys.size > 1 value[CONTENT_ROOT] = block.call(content) value else block.call(content) end else block.call(content, value) end else value.keys.size > 1 ? value : content end elsif value['type'] == 'string' && value['nil'] != 'true' '' # blank or nil parsed values are represented by nil elsif value.empty? || value['nil'] == 'true' nil # If the type is the only element which makes it then # this still makes the value nil, except if type is # a XML node(where type['value'] is a Hash) elsif value['type'] && value.size == 1 && !value['type'].is_a?(Hash) nil else xml_value = value.inject({}) do |hash, (k, v)| hash[k] = typecast_xml_value(v, disallowed_types) hash end # Turn {:files => {:file => #<StringIO>} into {:files => #<StringIO>} so it is compatible with # how multipart uploaded files from HTML appear xml_value['file'].is_a?(StringIO) ? xml_value['file'] : xml_value end when Array value.map! { |i| typecast_xml_value(i, disallowed_types) } value.length > 1 ? value : value.first when String value else raise("can't typecast #{value.class.name}: #{value.inspect}") end end
# File lib/multi_xml.rb, line 204 def undasherize_keys(params) case params when Hash params.inject({}) do |hash, (key, value)| hash[key.to_s.tr('-'.freeze, '_'.freeze)] = undasherize_keys(value) hash end when Array params.collect { |value| undasherize_keys(value) } else params end end