class DataAnonymizer::Sanitize
Constants
- ALPHABETS
- NUMBERS
Public Instance Methods
anonymize_common_attributes(model_names, sync_attributes)
click to toggle source
model_names = [“Person”, “CensusMember”, “CuramUser”] sync_attributes = [“first_name”, “last_name”, “ssn”]
# File lib/data_anonymizer.rb, line 17 def anonymize_common_attributes(model_names, sync_attributes) return if model_names.blank? || sync_attributes.blank? @@models_to_anonymize = process_model_names(model_names) @@attributes_to_sync = process_sync_attributes(sync_attributes) puts 'Data Anonymization Started...' puts '*' * 85 # Update attributes that need to be in sync accross Models. @@attributes_to_sync.each do |attribute| puts "Anonymizing attribute: #{attribute} across #{@@models_to_anonymize}" cipher_hash = build_substitution_cipher_hash_for(attribute) @@models_to_anonymize.each do |model| puts " - Processing #{model} Model" model.constantize.each do |instance| updated = false while !updated do begin instance.update_attributes(attribute => encrypt_characters(cipher_hash, instance.send(attribute))) if instance.send(attribute).present? updated = true rescue Mongoid::Errors::Validations => e puts " - Validation Error while updating [#{attribute}] for [#{instance.class}, ID: #{instance.id}]. Retrying with a new shuffled value...(Error Message: #{e.message})" rescue Exception => e puts " - Failed to anonymize record [#{attribute}] for [#{instance.class}, ID: #{instance.id}] (Error Message: #{e.message})" break end end end end end puts '*' * 85 puts 'Data Anonymization Complete!!!' end
Private Instance Methods
build_substitution_cipher_hash_for(attribute)
click to toggle source
# File lib/data_anonymizer.rb, line 61 def build_substitution_cipher_hash_for(attribute) if attribute.to_sym == :ssn loop do # Keep Shuffling until SSN is unique shuffled_chars = NUMBERS.shuffle return NUMBERS.zip(shuffled_chars).to_h if no_ssn_collision?(shuffled_chars) end else shuffled_chars = ALPHABETS.shuffle ALPHABETS.zip(shuffled_chars).to_h end end
encrypt_characters(cipher_hash, attribute_value)
click to toggle source
# File lib/data_anonymizer.rb, line 74 def encrypt_characters(cipher_hash, attribute_value) encrypted_value = "" attribute_value.gsub!(/[^0-9A-Za-z]/, '') # remove non-alphanumeric characters from string. #attribute_value.downcase.split('').each {|char| encrypted_value << cipher_hash[char]} attribute_value.downcase.split('').each do |char| encrypted_value << (cipher_hash[char].present? ? cipher_hash[char] : char) end encrypted_value end
no_ssn_collision?(shuffled_chars)
click to toggle source
# File lib/data_anonymizer.rb, line 84 def no_ssn_collision?(shuffled_chars) @@models_to_anonymize.each do |model| return false if model.constantize.where(ssn: shuffled_chars).present? end return true end
process_model_names(model_names)
click to toggle source
# File lib/data_anonymizer.rb, line 53 def process_model_names(model_names) model_names.map { |m| m.to_s } end
process_sync_attributes(sync_attributes)
click to toggle source
# File lib/data_anonymizer.rb, line 57 def process_sync_attributes(sync_attributes) sync_attributes end