class NdrImport::Xml::Table

This class maintains the state of a xml table mapping and encapsulates the logic required to transform a table of data into “records”. Particular attention has been made to use enumerables throughout to help with the transformation of large quantities of data.

Constants

XML_OPTIONS

Public Class Methods

all_valid_options() click to toggle source
Calls superclass method NdrImport::Table::all_valid_options
# File lib/ndr_import/xml/table.rb, line 16
def self.all_valid_options
  super - %w[delimiter header_lines footer_lines] + XML_OPTIONS
end

Public Instance Methods

header_lines() click to toggle source
# File lib/ndr_import/xml/table.rb, line 22
def header_lines
  0
end
transform_line(line, index) { |klass, fields, index| ... } click to toggle source

This method transforms an incoming line (element) of xml data by applying each of the klass masked mappings to the line and yielding the klass and fields for each mapped klass.

# File lib/ndr_import/xml/table.rb, line 33
def transform_line(line, index)
  return enum_for(:transform_line, line, index) unless block_given?
  raise 'Not an Nokogiri::XML::Element!' unless line.is_a? Nokogiri::XML::Element

  augmented_masked_mappings = augment_and_validate_column_mappings_for(line)

  xml_line = xml_line_from(line)

  records_from_xml_line = []
  augmented_masked_mappings.each do |klass, klass_mappings|
    fields = mapped_line(xml_line, klass_mappings)

    next if fields[:skip].to_s == 'true'.freeze

    if yield_xml_record
      records_from_xml_line << [klass, fields, index]
    else
      yield(klass, fields, index)
    end
  end
  yield(records_from_xml_line.compact) if yield_xml_record
end

Private Instance Methods

augment_and_validate_column_mappings_for(line) click to toggle source
# File lib/ndr_import/xml/table.rb, line 58
def augment_and_validate_column_mappings_for(line)
  augment_column_mappings_for(line)
  validate_column_mappings(line)

  NdrImport::Xml::MaskedMappings.new(@klass, @augmented_columns.deep_dup).call
end
augment_column_mappings_for(line) click to toggle source

Add missing column mappings (and column_xpaths) where repeating sections / data items appear

# File lib/ndr_import/xml/table.rb, line 67
def augment_column_mappings_for(line)
  # Start with a fresh set of @augmented_columns for each line, adding new mappings as
  # required for each `line`
  @augmented_columns       = @columns.deep_dup
  @augmented_column_xpaths = column_xpaths.deep_dup

  unmapped_xpaths(line).each do |unmapped_xpath|
    existing_column = find_existing_column_for(unmapped_xpath.dup)
    next unless existing_column

    unmapped_xpath_hash   = labelled_xpath_components_from(unmapped_xpath)
    klass_increment_match = unmapped_xpath.match(/\[(\d+)\]/)
    raise "could not identify klass for #{unmapped_xpath}" unless klass_increment_match

    new_column = NdrImport::Xml::ColumnMapping.new(existing_column, unmapped_xpath_hash,
                                                   klass_increment_match[1], line,
                                                   @klass).call
    @augmented_columns << new_column
    @augmented_column_xpaths << build_xpath_from(new_column)
  end
end
build_xpath_from(column) click to toggle source
# File lib/ndr_import/xml/table.rb, line 191
def build_xpath_from(column)
  column_name = column_name_from(column)
  column['xml_cell'].presence ? relative_path_from(column, column_name) : column_name
end
column_name_from(column) click to toggle source
# File lib/ndr_import/xml/table.rb, line 161
def column_name_from(column)
  column[Strings::COLUMN] || column[Strings::STANDARD_MAPPING]
end
column_xpaths() click to toggle source
# File lib/ndr_import/xml/table.rb, line 165
def column_xpaths
  @column_xpaths ||= columns.map { |column| build_xpath_from(column) }
end
find_existing_column_for(unmapped_xpath) click to toggle source
# File lib/ndr_import/xml/table.rb, line 97
def find_existing_column_for(unmapped_xpath)
  # Remove any e.g. [2] which will be present on repeating sections
  unmapped_xpath.gsub!(/\[\d+\]/, '')
  unmapped_xpath_hash = labelled_xpath_components_from(unmapped_xpath)
  columns.detect do |column|
    column['column'] == unmapped_xpath_hash[:column_name] &&
      column.dig('xml_cell', 'relative_path') == unmapped_xpath_hash[:column_relative_path] &&
      column.dig('xml_cell', 'attribute') == unmapped_xpath_hash[:column_attribute]
  end
end
labelled_xpath_components_from(unmapped_xpath) click to toggle source

Returns a Hash containing labelled components for the given ‘unmapped_xpath` For example, an `unmapped_xpath` of “Record/Demographics/Sex/@code” would result in: { column_attribute: ’@code’,

column_name: 'Sex',
column_relative_path: 'Record/Demographics' }
# File lib/ndr_import/xml/table.rb, line 113
def labelled_xpath_components_from(unmapped_xpath)
  xpath_components = unmapped_xpath.split('/')
  column_attribute = new_column_attribute_from(xpath_components)
  # I dislike the `EnforcedShorthandSyntax`, code is less readable
  # rubocop:disable Style::HashSyntax
  { column_attribute: column_attribute,
    column_name: new_column_name_from(xpath_components, column_attribute),
    column_relative_path: new_relative_path_from(xpath_components, column_attribute) }
  # rubocop:enable Style::HashSyntax
end
mappable_xpaths_from(line) click to toggle source
# File lib/ndr_import/xml/table.rb, line 169
def mappable_xpaths_from(line)
  xpaths = []

  line.xpath('.//*[not(child::*)]').each do |node|
    next unless populated?(node)

    xpath = node.path.sub("#{line.path}/", '')
    if node.attributes.any?
      node.attributes.each_key { |key| xpaths << "#{xpath}/@#{key}" }
    else
      xpaths << xpath
    end
  end
  xpaths
end
new_column_attribute_from(xpath_components) click to toggle source
# File lib/ndr_import/xml/table.rb, line 124
def new_column_attribute_from(xpath_components)
  xpath_components.last.starts_with?('@') ? xpath_components.last[1...] : nil
end
new_column_name_from(xpath_components, column_attribute) click to toggle source
# File lib/ndr_import/xml/table.rb, line 128
def new_column_name_from(xpath_components, column_attribute)
  return xpath_components[-2] if column_attribute.present?

  xpath_components.last
end
new_relative_path_from(xpath_components, column_attribute) click to toggle source

xpaths can be e.g. code at Record/Demographics/Sex/ or Record/Demographics/Surname ‘xpath_components` is an array of the xpath’s components, for example: code at Record/Demographics/Sex/ => [‘Record’, ‘Demographics’, ‘Sex’, ‘@code’]

For the relative path, we want to return Record/Demographics. The upper_limit removes the “field name” (Sex or Surname here) and optionally the attribute (@code here) if present, from ‘xpath_components`. The resulting array is joined back together to form the relative path.

# File lib/ndr_import/xml/table.rb, line 142
def new_relative_path_from(xpath_components, column_attribute)
  upper_limit = column_attribute.present? ? -3 : -2
  xpath_components.count > 1 ? xpath_components[0..upper_limit].join('/') : nil
end
populated?(node) click to toggle source
# File lib/ndr_import/xml/table.rb, line 185
def populated?(node)
  node.element_children.empty? &&
    !node.is_a?(Nokogiri::XML::Comment) && !node.text? && !node.cdata? &&
    !(node.attributes.empty? && node.content.strip.blank?)
end
relative_path_from(column, colum_name) click to toggle source
# File lib/ndr_import/xml/table.rb, line 196
def relative_path_from(column, colum_name)
  xml_cell      = column['xml_cell']
  relative_path = xml_cell['relative_path'].presence ? xml_cell['relative_path'] : nil
  attribute     = xml_cell['attribute'].presence ? '@' + xml_cell['attribute'] : nil

  if relative_path && attribute
    relative_path + '/' + colum_name + '/' + attribute
  elsif relative_path
    relative_path + '/' + colum_name
  elsif attribute
    colum_name + '/' + attribute
  else
    colum_name
  end
end
unmapped_xpaths(line) click to toggle source

Not memoized this by design, we want to re-calculate unmapped xpaths after ‘@augmented_column_xpaths` have been augmented for each `line`

# File lib/ndr_import/xml/table.rb, line 157
def unmapped_xpaths(line)
  mappable_xpaths_from(line) - (@augmented_column_xpaths || column_xpaths)
end
validate_column_mappings(line) click to toggle source

Ensure every leaf is accounted for in the column mappings

# File lib/ndr_import/xml/table.rb, line 148
def validate_column_mappings(line)
  missing_xpaths = unmapped_xpaths(line)
  return if missing_xpaths.none?

  raise(NdrImport::Xml::UnmappedXpathError, missing_xpaths.to_sentence)
end
xml_line_from(line) click to toggle source
# File lib/ndr_import/xml/table.rb, line 89
def xml_line_from(line)
  @augmented_column_xpaths.map do |column_xpath|
    # Augmenting the column mappings should account for repeating sections/items
    # TODO: Is this needed now that we removed "duplicated" klass mappings?
    line.xpath(column_xpath).count > 1 ? '' : line.xpath(column_xpath).inner_text
  end
end