module ActiveSanitization

Constants

VERSION

Attributes

configuration[RW]

Public Class Methods

clean_up_files(dump_file, compressed_dump_file) click to toggle source
# File lib/active_sanitization.rb, line 200
def self.clean_up_files(dump_file, compressed_dump_file)
  self.log("Deleting #{dump_file}")
  File.delete(dump_file) if File.exist?(dump_file)
  self.log("Deleting #{compressed_dump_file}")
  File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
end
clean_up_temp_db(temp_db) click to toggle source
# File lib/active_sanitization.rb, line 164
def self.clean_up_temp_db(temp_db)
  self.log("Dropping #{temp_db}")
  self.configuration.active_record_connection.execute("DROP DATABASE #{temp_db};")
end
configure() { |configuration| ... } click to toggle source
# File lib/active_sanitization.rb, line 12
def self.configure
  self.configuration ||= Configuration.new
  yield(configuration)
end
create_files() click to toggle source
# File lib/active_sanitization.rb, line 142
def self.create_files
  dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
  compressed_dump_file = "#{dump_file}.gz"
  File.new(dump_file,  "w+")
  File.new(compressed_dump_file,  "w+")
  [dump_file, compressed_dump_file]
end
duplicate_database() click to toggle source
# File lib/active_sanitization.rb, line 95
def self.duplicate_database
  temp_db = "#{self.configuration.db_config['database']}_copy"

  self.log("Deleting temp DB if exists")
  self.configuration.active_record_connection.execute("DROP DATABASE IF EXISTS #{temp_db};")
  self.log("Creating temp DB")
  self.configuration.active_record_connection.execute("CREATE DATABASE #{temp_db}")
  self.log("Copying #{self.configuration.env} DB to temp DB")
  self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']}  -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
  system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']}  -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
  if $?.exitstatus == 0
    self.log("Temp DB created and populated")
  else
    raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
  end

  self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=XXXXXXXXX --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']}  -u #{self.configuration.db_config['username']} --password=XXXXXXXXX -D #{temp_db}")
  system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']}  -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
  if $?.exitstatus == 0
    self.log("Temp DB created and populated")
  else
    raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
  end

  temp_db_config = self.configuration.db_config.dup
  temp_db_config['database'] = temp_db
  TempDatabaseConnection.establish_connection(temp_db_config)
  temp_db_connection = TempDatabaseConnection.connection

  [temp_db, temp_db_connection, temp_db_config]
end
export_temp_db_to_file(dump_file, temp_db_config, temp_db) click to toggle source
# File lib/active_sanitization.rb, line 207
def self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)
  self.log("Dumping temp DB to #{dump_file}")
  system("mysqldump -h #{temp_db_config['host']} -u #{temp_db_config['username']} --password=#{temp_db_config['password']} #{temp_db} >> '#{dump_file}'")
  if $?.exitstatus == 0
    self.log("Dump created")
  else
    self.log("Failed to create dump")
    return
  end
end
get_s3_bucket() click to toggle source
# File lib/active_sanitization.rb, line 179
def self.get_s3_bucket
  resource = Aws::S3::Resource.new(client: get_s3_client)
  resource.bucket(self.configuration.s3_bucket)
end
get_s3_client() click to toggle source
# File lib/active_sanitization.rb, line 174
def self.get_s3_client
  creds = Aws::Credentials.new(self.configuration.aws_access_key_id, self.configuration.aws_secret_access_key)
  Aws::S3::Client.new(credentials: creds, region: self.configuration.s3_bucket_region)
end
gzip(dump_file) click to toggle source
# File lib/active_sanitization.rb, line 169
def self.gzip(dump_file)
  self.log("Gzipping #{dump_file}")
  system("gzip '#{dump_file}'")
end
hash_diff(hash1, hash2) click to toggle source

Returns a hash that represents the difference between two hashes.

hash_diff({1 => 2}, {1 => 2})         # => {}
hash_diff({1 => 2}, {1 => 3})         # => {1 => 2}
hash_diff({}, {1 => 2})               # => {1 => 2}
hash_diff({1 => 2, 3 => 4}, {1 => 2}) # => {3 => 4}
# File lib/active_sanitization.rb, line 47
def self.hash_diff(hash1, hash2)
  difference1 = hash1.dup
  difference2 = hash2.dup

  difference1.delete_if do |key, value|
    hash2[key] == value
  end

  difference2.delete_if do |key, value|
    hash1.has_key?(key)
  end

  difference1.merge(difference2)
end
import_data(env = nil, timestamp = nil) click to toggle source
# File lib/active_sanitization.rb, line 256
def self.import_data(env = nil, timestamp = nil)
  env = "production" if env.nil?
  prefix = "#{self.configuration.app_name}/#{env}/mysql"

  bucket = get_s3_bucket
  if timestamp.nil?
    timestamp = bucket.objects(prefix: prefix).collect {|x| x.key[%r(#{prefix}\/(.*)\/), 1] }.max
  end

  # Check that there are files (as the user could have passed in an incorrect timestamp)
  if timestamp.nil?
    self.log("No mysql snapshot for timestamp #{prefix}/#{timestamp}")
    return
  end

  self.log('WARNING: this rake task will dump your MySQL DB to a file, then wipe your DB before importing a snapshot')
  local_dump_file = "#{File.join(self.configuration.root, "tmp")}/local_data.dump"

  # Make copy of local DB just in case something goes wrong
  system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} > '#{local_dump_file}'")
  if $?.exitstatus == 0
    self.log("Local DB dump stored in #{local_dump_file}")
  else
    raise "Failed to create a local DB dump. If a previous local dump exists, please delete it and try again."
  end

  dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
  compressed_dump_file = "#{dump_file}.gz"

  name =  "#{prefix}/#{timestamp}/data.dump.gz"
  self.log("Downloading dump from bucket: #{self.configuration.s3_bucket}, path: #{name}")
  get_s3_client.get_object({ bucket:self.configuration.s3_bucket , key: name }, target: compressed_dump_file)

  # reset db
  self.log("Recreating your local DB")
  Rake::Task["db:drop"].invoke
  Rake::Task["db:create"].invoke

  # Import data
  self.log("Unzipping and importing data...")
  self.log("gunzip < #{compressed_dump_file} | mysql -u root #{self.configuration.db_config['database']}")
  system("gunzip < #{compressed_dump_file} | mysql -u root #{self.configuration.db_config['database']}")
  if $?.exitstatus == 0
    File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
  else
    raise "Could not load #{compressed_dump_file} into DB #{self.configuration.db_config}"
  end
  self.log('-- DONE --')
end
is_dev_or_integration_env?() click to toggle source
# File lib/active_sanitization.rb, line 218
def self.is_dev_or_integration_env?
  self.configuration.env == 'development' || self.configuration.env == 'integration'
end
log(output) click to toggle source
# File lib/active_sanitization.rb, line 62
def self.log(output)
  self.configuration.loggers.each do |logger|
    logger.info(output)
  end unless self.configuration.env == 'test'
end
pre_sanitization_checks() click to toggle source
# File lib/active_sanitization.rb, line 68
def self.pre_sanitization_checks
  db_tables = {}
  self.configuration.active_record_connection.tables.each do |table_name|
    next if self.configuration.tables_to_ignore.include?(table_name)
    db_tables[table_name] = []
    self.configuration.active_record_connection.columns(table_name).each { |c| db_tables[table_name] << c.name }
    db_tables[table_name].sort!
  end

  # diff will only work correctly if the columns are sorted the same
  tables_with_sorted_columns = {}
  self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate).each { |k, v| tables_with_sorted_columns[k] = v.sort }
  table_difference = hash_diff(db_tables, tables_with_sorted_columns)
  checks = {}
  if table_difference != {}
    column_difference = {}
    table_difference.collect do |table_name, table_columns|
      column_difference[table_name] = table_columns - self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate)[table_name].to_a
    end
    checks[:pass] = false
    checks[:error] = "The following tables or columns have been found in the #{self.configuration.env} DB but are not known to this script (#{column_difference}).\n Please update the active_sanitization config!"
  else
    checks[:pass] = true
  end
  checks
end
sanitize_and_export_data() click to toggle source
# File lib/active_sanitization.rb, line 222
def self.sanitize_and_export_data
  checks = self.pre_sanitization_checks
  if checks[:pass]
    dump_file, compressed_dump_file = self.create_files
    self.clean_up_files(dump_file, compressed_dump_file)

    # If in dev or integration env we don't need to sanatise the DB so we should
    # just dump it to a file and upload
    if self.is_dev_or_integration_env?
      self.export_temp_db_to_file(dump_file, self.configuration.db_config, self.configuration.db_config["database"])
    else
      temp_db, temp_db_connection, temp_db_config = self.duplicate_database

      self.sanitize_tables(temp_db_connection)

      self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)

      self.clean_up_temp_db(temp_db)
    end

    self.gzip(dump_file)

    if self.configuration.s3_bucket && self.configuration.aws_access_key_id && self.configuration.aws_secret_access_key
      self.upload(compressed_dump_file)
    else
      self.clean_up_files(dump_file, compressed_dump_file)
    end

    self.log("-- DONE --")
  else
    self.log(checks[:error])
  end
end
sanitize_table(table, temp_db_connection) click to toggle source
# File lib/active_sanitization.rb, line 127
def self.sanitize_table(table, temp_db_connection)
  table_columns = temp_db_connection.select_values("DESCRIBE #{table};")
  self.configuration.sanitization_columns.keys.each do |column|
    if table_columns.include?(column)
      distinct_values = temp_db_connection.execute("SELECT DISTINCT(#{column}) FROM #{table};").collect { |data| data.first }
      distinct_values.each do |value|
        temp_db_connection.execute("UPDATE #{table} SET #{column}='#{self.configuration.sanitization_columns[column].sample}' WHERE #{column}=#{ActiveRecord::Base.sanitize(value)};")
      end
    end
  end

  # Run any custom sanitization for the table
  self.configuration.custom_sanitization.send("sanitize_#{table}", temp_db_connection) if self.configuration.custom_sanitization.respond_to?("sanitize_#{table}")
end
sanitize_tables(temp_db_connection) click to toggle source
# File lib/active_sanitization.rb, line 150
def self.sanitize_tables(temp_db_connection)
  self.log("Processing TABLES_TO_TRUNCATE...")
  self.configuration.tables_to_truncate.keys.each do |table|
     self.log("Truncating #{table}")
     temp_db_connection.execute("TRUNCATE #{table};")
  end

  self.log("Processing TABLES_TO_SANITIZE...")
  self.configuration.tables_to_sanitize.keys.each do |table|
    self.log("Sanitizing #{table}")
    self.sanitize_table(table, temp_db_connection)
  end
end
upload(compressed_dump_file) click to toggle source
# File lib/active_sanitization.rb, line 184
def self.upload(compressed_dump_file)
  timestamp = DateTime.now.strftime('%Y%m%d%H%M%S')
  name = "#{self.configuration.app_name}/#{self.configuration.env}/mysql/#{timestamp}/#{File.basename(compressed_dump_file)}"
  self.log("Uploading to bucket: #{self.configuration.s3_bucket}, path: #{name}")
  file = File.open(compressed_dump_file, 'r')

  bucket = get_s3_bucket
  obj = bucket.object(name)
  obj.put(body: file)

  file.close
  File.unlink(compressed_dump_file)

  obj
end