class SimpleXmlParser::Parser
For record filter, pass a lambda that takes a record as a parameter, and returns true to include it or false to exclude it, e.g. to include only records with a “title”, do this: processor.record_filter = ->(rec) { rec.title } If a field name has been changed via the field_name_renames
hash, the new name should be used in the filter.
Constants
- ANSI_GO_TO_LINE_START
Attributes
array_name[R]
Constructor parameters:
current_property_name[RW]
For internal use:
field_name_renames[RW]
User-provided callbacks:
input_record_count[RW]
For internal use:
integer_fields[R]
Constructor parameters:
key_filter[RW]
User-provided callbacks:
output_record_count[RW]
For internal use:
record[RW]
For internal use:
record_filter[RW]
User-provided callbacks:
record_name[R]
Constructor parameters:
records[RW]
For internal use:
start_time[R]
Public Class Methods
new(array_name:, record_name:, integer_fields: nil, key_filter: nil, record_filter: nil, field_name_renames: nil)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 41 def initialize(array_name:, record_name:, integer_fields: nil, key_filter: nil, record_filter: nil, field_name_renames: nil) @array_name = array_name @record_name = record_name @integer_fields = integer_fields @key_filter = key_filter @record_filter = record_filter @field_name_renames = field_name_renames @current_property_name = nil @record = {} @records = [] @start_time = current_time @keys_to_exclude = [] @input_record_count = 0 @output_record_count = 0 end
Public Instance Methods
characters(string)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 128 def characters(string) if current_property_name && include_this_field?(current_property_name) key = output_field_name(current_property_name) value = maybe_convert_to_integer(current_property_name, string) record[key] = value end end
current_time()
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 67 def current_time Process.clock_gettime(Process::CLOCK_MONOTONIC) end
end_element(name)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 95 def end_element(name) case name when array_name # end of data, write JSON file finish when record_name if record_filter.nil? || record_filter.(record) self.output_record_count += 1 records << record end self.record = {} else self.current_property_name = nil end end
finish()
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 137 def finish output_status puts end
include_this_field?(field_name)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 123 def include_this_field?(field_name) key_filter.nil? || key_filter.(field_name) end
maybe_convert_to_integer(field_name, value)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 117 def maybe_convert_to_integer(field_name, value) needs_conversion = integer_fields&.include?(field_name) needs_conversion ? Integer(value) : value end
output_field_name(input_field_name)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 111 def output_field_name(input_field_name) return input_field_name if field_name_renames.nil? field_name_renames[input_field_name] || input_field_name end
output_status()
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 72 def output_status print ANSI_GO_TO_LINE_START print "Records processed: %9d kept: %9d Seconds elapsed: %11.2f" % [ input_record_count, output_record_count, current_time - start_time ] end
parse(data_source)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 59 def parse(data_source) data_source = File.new(data_source) if data_source.is_a?(String) parser = Nokogiri::XML::SAX::Parser.new(self) parser.parse(data_source) records end
start_element(name, _attrs)
click to toggle source
# File lib/simple_xml_parser/parser.rb, line 82 def start_element(name, _attrs) case name when array_name # ignore when record_name self.input_record_count += 1 output_status if input_record_count % 1000 == 0 else # this is a field in the players record; process it as such self.current_property_name = name end end