class ReindeerETL::Transforms::Recode
Attributes
cols[RW]
Public Class Methods
new(opts={})
click to toggle source
# File lib/reindeer-etl/transforms/recode.rb, line 5 def initialize opts={} @cols = opts[:cols] @except = (opts[:except] || []).to_set @codes = opts[:codes] || {} @ignore_vals = (opts[:ignore] || []) @ignore_all = (opts[:ignore_all] || false) @error_on_unknown = !@ignore_all if @cols.nil? && opts.keys.include?(:cols) raise ArgumentError.new(':cols array is empty') end @cols = @cols.to_set unless @cols.nil? raise ArgumentError.new(':codes hash is empty') if @codes.empty? @acceptable_keys = (@codes.keys + @ignore_vals).to_set @counter = 0 end
Public Instance Methods
process(row)
click to toggle source
# File lib/reindeer-etl/transforms/recode.rb, line 22 def process row @cols ||= row.keys.to_set - @except # Raise error unless all columns are present rset = row.keys.to_set unless @cols.subset?(rset) m_cols = @cols - rset raise ReindeerETL::Errors::RecordInvalid.new("Missing columns: #{m_cols.to_a}") end # Run recode @cols.each do |col| val = row[col] _validate_val(val) _update_row(row, col, val) end @counter += 1 row end
Private Instance Methods
_update_row(row, col, val)
click to toggle source
# File lib/reindeer-etl/transforms/recode.rb, line 52 def _update_row(row, col, val) if @acceptable_keys.include? val row[col] = @codes[val] if @codes.has_key?(val) elsif @error_on_unknown raise ReindeerETL::Errors::RecordInvalid.new("Invalid value in recode: row# #{@counter} {#{col}:#{val}}") end end
_validate_val(val)
click to toggle source
# File lib/reindeer-etl/transforms/recode.rb, line 45 def _validate_val(val) if @error_on_unkown && !@any_val.include?(val) # Raise error if we don't recognize this value raise ReindeerETL::Errors::RecordInvalid.new("Bad value: #{val}") end end