class Cranium::Transformation::DuplicationIndex

Public Class Methods

[](*fields) click to toggle source
# File lib/cranium/transformation/duplication_index.rb, line 5
def self.[](*fields)
  raise ArgumentError, "Cannot build duplication index for empty fieldset" if fields.empty?
  @instances ||= {}
  @instances[fields] ||= new(*fields)
end
new(*fields) click to toggle source
# File lib/cranium/transformation/duplication_index.rb, line 13
def initialize(*fields)
  @fields = fields
  @fingerprints = Set.new
end

Public Instance Methods

duplicate?(record) click to toggle source
# File lib/cranium/transformation/duplication_index.rb, line 20
def duplicate?(record)
  fingerprint = take_fingerprint(record)

  if @fingerprints.include? fingerprint
    true
  else
    @fingerprints.add fingerprint
    false
  end
end

Private Instance Methods

take_fingerprint(record) click to toggle source
# File lib/cranium/transformation/duplication_index.rb, line 35
def take_fingerprint(record)
  @fields.map do |field_name|
    raise StandardError, "Missing deduplication key from record: #{field_name}" unless record.has_key? field_name
    record[field_name]
  end
end