module DBX
Collection of database utility methods
rubocop:disable all
rubocop:disable all
Constants
- CONFIG_PATH
Public Instance Methods
column_types(src, sample_rows: config_sample_rows, csv_options: {})
click to toggle source
Read source as CSV and detect types based on `sample_rows` Types returns match with ActiveRecord column types. Types are memory cached by `src`.
@return [Hash<String, Symbol>] column name to type symbols
# File lib/dbx.rb, line 123 def column_types(src, sample_rows: config_sample_rows, csv_options: {}) headers = nil count = 0 csv_options[:headers] = false @types ||= {} types = @types[src] return types if types types = [] CSV.foreach(src, **csv_options) do |row| unless headers headers = row.map(&:downcase) next end headers.each_with_index do |header, i| next if types[i] == :string pattern_type = config['column_patterns'].detect{ |pat, _| header =~ /#{pat}/ } if pattern_type types[i] = pattern_type.last next end type = detect_type(row[i]) next if type.nil? if types[i] == :decimal && type == :integer # keep decimal elsif types[i] == :datetime && type == :date # keep datetime else # assign the new type types[i] = type end end # stop after max rows reached break if (count += 1) > sample_rows end # any remaining nil types are assigned as :string types.size.times{|i| types[i] ||= :string } @types[src] = Hash[headers.zip(types)] end
config()
click to toggle source
# File lib/dbx.rb, line 21 def config @config ||= config_from_yaml end
config_auto_index_pattern()
click to toggle source
# File lib/dbx.rb, line 37 def config_auto_index_pattern config['auto_index_pattern'] end
config_db()
click to toggle source
# File lib/dbx.rb, line 41 def config_db ENV['DATABASE_URL'] || config['db'] || raise('`db` not set as command line option or `dbx.yml`') end
config_from_yaml()
click to toggle source
# File lib/dbx.rb, line 25 def config_from_yaml if File.file?(CONFIG_PATH) YAML.load(IO.read(CONFIG_PATH)) else {} end end
config_sample_rows()
click to toggle source
# File lib/dbx.rb, line 33 def config_sample_rows config['sample_rows'] || 100 end
connection(db_url: config_db, &block)
click to toggle source
# File lib/dbx.rb, line 54 def connection(db_url: config_db, &block) # ENV['DATABASE_URL'] = db_url # @pool ||= ModelBase.establish_connection(db_url) @pool ||= begin ModelBase.establish_connection(db_url) ModelBase.logger = Logger.new(tty) end ModelBase.connection_pool.with_connection(&block) end
create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {})
click to toggle source
# File lib/dbx.rb, line 68 def create_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) name ||= parse_table_name(src) types = column_types(src, sample_rows: sample_rows, csv_options: csv_options) connection do |conn| conn.create_table name, force: force, id: false do |t| types.each do |column, type| t.send(type, column, nulls: true) end end end end
detect_type(value)
click to toggle source
Detect the column type given a value. May return nil if the value is blank.
# File lib/dbx.rb, line 169 def detect_type(value) if value.blank? nil elsif value =~ /^\d+\.\d+$/ :decimal elsif value =~ /^\d{1,10}$/ :integer elsif value =~ /^\h{8}-\h{4}-\h{4}-\h{4}-\h{12}$/ :uuid elsif value =~ /^\d{4}(\D)\d{2}\1\d{2}$/ && (Date.parse(value) rescue false) :date elsif (Time.parse(value) rescue false) :datetime else :string end end
import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {})
click to toggle source
TODO parse CSV options into Postgres
# File lib/dbx.rb, line 81 def import_table(src, name: nil, force: false, sample_rows: config_sample_rows, csv_options: {}) name ||= parse_table_name(src) connection do |conn| create_table(src, name: name, force: force, sample_rows: sample_rows, csv_options: csv_options) # TODO only postgres is support at the moment pg = conn.instance_variable_get(:@connection) types = column_types(src).keys.map{|m| %("#{m}")} pg_stmt = %{COPY "#{name}"(#{types.join(',')}) FROM STDIN CSV} conn.logger.debug(pg_stmt) pg.copy_data(pg_stmt) do first = true IO.foreach(src) do |line| if first first = false next end pg.put_copy_data(line) end end unless config_auto_index_pattern.blank? index_table(name, pattern: /#{config_auto_index_pattern}/) end end name end
index_table(table_name, pattern: nil)
click to toggle source
# File lib/dbx.rb, line 109 def index_table(table_name, pattern: nil) connection do |conn| conn.columns(table_name).each_with_index do |column, i| next unless column.name =~ pattern conn.add_index(table_name, [column.name], name: "idx_#{table_name}_#{i.to_s.rjust(2,'0')}") end end end
info(msg)
click to toggle source
# File lib/dbx.rb, line 50 def info(msg) tty.puts("\e[33m#{msg}\e[0m") end
parse_table_name(src)
click to toggle source
# File lib/dbx.rb, line 64 def parse_table_name(src) File.basename(src).sub(File.extname(src), '').downcase end
tty()
click to toggle source
TODO what about windows?!
# File lib/dbx.rb, line 46 def tty @tty ||= File.open('/dev/tty', 'a') end