class SimpleXmlParser::Parser

For record filter, pass a lambda that takes a record as a parameter, and returns true to include it or false to exclude it, e.g. to include only records with a “title”, do this: processor.record_filter = ->(rec) { rec.title } If a field name has been changed via the field_name_renames hash, the new name should be used in the filter.

Constants

ANSI_GO_TO_LINE_START

Attributes

array_name[R]

Constructor parameters:

current_property_name[RW]

For internal use:

field_name_renames[RW]

User-provided callbacks:

input_record_count[RW]

For internal use:

integer_fields[R]

Constructor parameters:

key_filter[RW]

User-provided callbacks:

output_record_count[RW]

For internal use:

record[RW]

For internal use:

record_filter[RW]

User-provided callbacks:

record_name[R]

Constructor parameters:

records[RW]

For internal use:

start_time[R]

Public Class Methods

new(array_name:, record_name:, integer_fields: nil, key_filter: nil, record_filter: nil, field_name_renames: nil) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 41
def initialize(array_name:, record_name:, integer_fields: nil,
               key_filter: nil, record_filter: nil, field_name_renames: nil)
  @array_name = array_name
  @record_name = record_name
  @integer_fields = integer_fields
  @key_filter = key_filter
  @record_filter = record_filter
  @field_name_renames = field_name_renames
  @current_property_name = nil
  @record = {}
  @records = []
  @start_time = current_time
  @keys_to_exclude = []
  @input_record_count = 0
  @output_record_count = 0
end

Public Instance Methods

characters(string) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 128
def characters(string)
  if current_property_name && include_this_field?(current_property_name)
    key = output_field_name(current_property_name)
    value = maybe_convert_to_integer(current_property_name, string)
    record[key] = value
  end
end
current_time() click to toggle source
# File lib/simple_xml_parser/parser.rb, line 67
def current_time
  Process.clock_gettime(Process::CLOCK_MONOTONIC)
end
end_element(name) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 95
def end_element(name)
  case name
  when array_name  # end of data, write JSON file
    finish
  when record_name
    if record_filter.nil? || record_filter.(record)
      self.output_record_count += 1
      records << record
    end
    self.record = {}
  else
    self.current_property_name = nil
  end
end
finish() click to toggle source
# File lib/simple_xml_parser/parser.rb, line 137
def finish
  output_status
  puts
end
include_this_field?(field_name) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 123
def include_this_field?(field_name)
  key_filter.nil? || key_filter.(field_name)
end
maybe_convert_to_integer(field_name, value) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 117
def maybe_convert_to_integer(field_name, value)
  needs_conversion = integer_fields&.include?(field_name)
  needs_conversion ? Integer(value) : value
end
output_field_name(input_field_name) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 111
def output_field_name(input_field_name)
  return input_field_name if field_name_renames.nil?
  field_name_renames[input_field_name] || input_field_name
end
output_status() click to toggle source
# File lib/simple_xml_parser/parser.rb, line 72
def output_status
  print ANSI_GO_TO_LINE_START
  print "Records processed: %9d   kept: %9d    Seconds elapsed: %11.2f" % [
      input_record_count,
      output_record_count,
      current_time - start_time
  ]
end
parse(data_source) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 59
def parse(data_source)
  data_source = File.new(data_source) if data_source.is_a?(String)
  parser = Nokogiri::XML::SAX::Parser.new(self)
  parser.parse(data_source)
  records
end
start_element(name, _attrs) click to toggle source
# File lib/simple_xml_parser/parser.rb, line 82
def start_element(name, _attrs)
  case name
  when array_name
    # ignore
  when record_name
    self.input_record_count += 1
    output_status if input_record_count % 1000 == 0
  else # this is a field in the players record; process it as such
    self.current_property_name = name
  end
end