module DBX::Differ

Public Instance Methods

create_diff_stats(diff_table, force: false) click to toggle source
# File lib/dbx/differ.rb, line 57
    def create_diff_stats(diff_table, force: false)
      DBX.connection do |conn|
        diff_stats = "#{diff_table}_stats"
        conn.execute("DROP TABLE IF EXISTS #{diff_stats}") if force
        selects = conn.columns(diff_table).map do |column|
          header, type = column.name, column.type
          col = header[/(.*)_diff$/, 1]
          next unless col

          if column.sql_type == 'interval'
            %{SUM(#{header}) AS #{col}_sum}
          else
            case type
            when :decimal, :integer, :date, :datetime
              %{SUM(#{header}) AS #{col}_sum}
            else
              %{COUNT(#{header}) AS #{col}_count}
            end
          end
        end.compact.join(",\n")
        conn.execute(<<-SQL)
          CREATE TABLE #{diff_stats} AS
          SELECT
            #{selects}
          FROM #{diff_table}
        SQL
      end
    end
diff(table_a:, table_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false) click to toggle source

Compare `src_a` with `src_b`. Numeric types will be diffed by subtracting the values. Dates will contain difference by day. Datetime will contain difference by seconds. Everything else can only return a boolean true/false that it is different.

@param [String] table A Should be the initial state table. @param [String] table B Should be newer than table A, but doesn't have to be. @param [Array<String>] using is the join criteria between the 2 tables. @param [Array<String>] exclude_columns are excluded from the diff comparison.

# File lib/dbx/differ.rb, line 16
    def diff(table_a:, table_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false)
      table_diff = "diff_#{table_a}_#{table_b}"
      exclude_columns ||= []
      using = using.map(&:downcase)
      DBX.info("Creating diff table #{table_diff}")
      DBX.connection do |conn|
        conn.execute("DROP TABLE IF EXISTS #{table_diff}") if force
        conn.execute(<<-SQL)
        CREATE TABLE #{table_diff} AS
        SELECT
          #{using.join(', ')},
          #{select_columns(table_a, exclude_columns: using + exclude_columns, no_a_b: no_a_b)}
        FROM #{table_a} AS a
        FULL OUTER JOIN #{table_b} b USING (#{using.join(',')})
        WHERE
          #{where_columns(table_a, exclude_columns: using + exclude_columns)}
        SQL
        DBX.info(conn.exec_query(<<-SQL).as_json)
        SELECT
          (SELECT COUNT(*) FROM #{table_a}) count_table_a,
          (SELECT COUNT(*) FROM #{table_b}) count_table_b,
          (SELECT COUNT(*) FROM #{table_diff}) diffs
        SQL
      end
      DBX.info("Creating diff stats: #{table_diff}")
      create_diff_stats(table_diff, force: force)

      DBX.info("Diff complete. Results details in:   #{table_diff}")
    end
import_and_diff(src_a:, src_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 46
def import_and_diff(src_a:, src_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false)
  DBX.info("Importing #{src_a}")
  table_a = DBX.import_table(src_a, force: force)


  DBX.info("Importing #{src_b}")
  table_b = DBX.import_table(src_b, force: force)

  diff(table_a: table_a, table_b: table_b, force: force, using: using, exclude_columns: exclude_columns, no_a_b: no_a_b)
end
select_boolean(column, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 145
def select_boolean(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}NULLIF(#{a} <> #{b}, FALSE) AS #{column}_diff)
end
select_columns(table, exclude_columns: nil, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 86
def select_columns(table, exclude_columns: nil, no_a_b: false)
  exclude_columns ||= []
  DBX.connection do |conn|
    conn.columns(table).map do |column|
      header, type = column.name, column.type
      next if exclude_columns.include?(header)
      case type
      when :decimal, :integer
        select_difference(header, no_a_b: no_a_b)
      when :date
        select_difference_as_int(header, no_a_b: no_a_b)
      when :datetime
        select_difference_as_interval(header, no_a_b: no_a_b)
      else
        select_boolean(header, no_a_b: no_a_b)
      end
    end.compact.join(',')
  end
end
select_difference(column, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 117
def select_difference(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL WHEN #{a} IS NULL THEN #{b} WHEN #{b} IS NULL THEN #{a} ELSE NULLIF(#{b} - #{a}, 0) END) AS #{column}_diff)
end
select_difference_as_int(column, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 124
def select_difference_as_int(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL::bigint WHEN #{a} IS NULL THEN 1 WHEN #{b} IS NULL THEN -1 ELSE (#{b} - #{a}) END) AS #{column}_diff)
end
select_difference_as_interval(column, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 131
def select_difference_as_interval(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL::interval WHEN #{a} IS NULL THEN '1 day'::interval WHEN #{b} IS NULL THEN '-1 day'::interval ELSE (#{b} - #{a})::interval END) AS #{column}_diff)
end
select_difference_as_text(column, no_a_b: false) click to toggle source
# File lib/dbx/differ.rb, line 138
def select_difference_as_text(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL WHEN #{a} IS NULL THEN #{b}::text WHEN #{b} IS NULL THEN #{a}::text ELSE (#{b} - #{a})::text END) AS #{column}_diff)
end
where_columns(table, exclude_columns: nil) click to toggle source
# File lib/dbx/differ.rb, line 106
def where_columns(table, exclude_columns: nil)
  exclude_columns ||= []
  DBX.connection do |conn|
    conn.columns(table).map do |column|
      header, type = column.name, column.type
      next if exclude_columns.include?(header)
      %((a.#{header} <> b.#{header}))
    end
  end.compact.join('OR')
end