class PgDumpAnonymize::Definition

This is used to define rules and apply the rules when parsing the dump sql file

Public Class Methods

new(attribute_rules) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 6
def initialize(attribute_rules)
  @attribute_rules = attribute_rules
  @current_table = nil
  @positional_substitutions = nil
end

Public Instance Methods

process_line(line) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 12
def process_line(line)
  if @current_table
    if end_stdin?(line)
      clear_current_table
    elsif skip?(line)
      # do nothing
    elsif delete?(line)
      line = ''
    else
      line = anonymize_line(line)
    end
  else
    process_copy_line(line)
  end
  line
end

Private Instance Methods

anonymize_line(line) click to toggle source

This assumes the line is a tab delimited data line

# File lib/pg_dump_anonymize/definition.rb, line 32
def anonymize_line(line)
  values = line.split("\t")
  row_context = { row: row_to_hash(values) } # used to share state for a row
  @positional_substitutions.each do |index, val_def|
    values[index] = if val_def.is_a?(Proc)
                      val_def.call(*[values[index], row_context].slice(0, val_def.arity))
                    else
                      val_def
                    end

    # Postgres represents nil/null as '\N' in SQL dumps
    values[index] = '\N' if values[index].nil?
  end
  values.join("\t")
end
clear_current_table() click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 83
def clear_current_table
  @current_table = nil
  @fields = nil
  @positional_substitutions = nil
end
delete?(row) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 97
def delete?(row)
  if (delete_if = @attribute_rules.dig(@current_table, :_delete_if))
    !!delete_if.call(row_to_hash(row))
  else
    false
  end
end
end_stdin?(line) click to toggle source

stdin is escaped with a line that is just '.'

# File lib/pg_dump_anonymize/definition.rb, line 75
def end_stdin?(line)
  line =~ /^\\.$/
end
find_positions(fields_str, rules) click to toggle source

Finds the positional range of the attribute to be replaced returns an array of arrays. The inner array is [<field_index>, <anonymous_value>]

# File lib/pg_dump_anonymize/definition.rb, line 61
def find_positions(fields_str, rules)
  @fields = fields_str.gsub('"', '').split(', ')

  rules.map do |target_field, val|
    index = @fields.index(target_field.to_s)
    [index, val] if index
  end.compact
end
line_regex() click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 70
def line_regex
  @line_regex ||= /^COPY public\.(?<table_name>#{table_names.join('|')}) \((?<field_defs>.*)\) FROM stdin;$/
end
process_copy_line(line) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 48
def process_copy_line(line)
  match_data = line.match(line_regex)
  return unless match_data

  table = match_data[:table_name].to_sym
  fields = match_data[:field_defs]

  @current_table = table
  @positional_substitutions = find_positions(fields, @attribute_rules[table])
end
row_to_hash(row) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 105
def row_to_hash(row)
  return nil unless @fields

  values = row.is_a?(String) ? row.split("\t") : row

  begin
    Hash[*@fields.zip(values).flatten]
  rescue StandardError => e
    raise "#{e.message}, row_to_hash error encountered: current_table: #{@current_table} -- fields(#{@fields&.length}): #{@fields} -- values(#{values&.length}): #{values}"
  end
end
skip?(row) click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 89
def skip?(row)
  if (skip_if = @attribute_rules.dig(@current_table, :_skip_if))
    !!skip_if.call(row_to_hash(row))
  else
    false
  end
end
table_names() click to toggle source
# File lib/pg_dump_anonymize/definition.rb, line 79
def table_names
  @attribute_rules.keys
end