class ReindeerETL::Sources::MultiSource

Public Class Methods

new(key, paths, opts={}) click to toggle source
# File lib/reindeer-etl/sources/multi_source.rb, line 6
def initialize key, paths, opts={}
    @klass = opts[:klass] || ReindeerETL::Sources::CSVSource
    @key = key
    @sources = paths.map{|path|
        @klass.new path
    }
end

Public Instance Methods

each() { |row| ... } click to toggle source
# File lib/reindeer-etl/sources/multi_source.rb, line 14
def each
    rows = []
    all_keys = Set.new
    @sources.each_with_index do |source, source_idx|
        first_row = false
        idx = 0
        source.each do |row|
            unless first_row
                first_row = true
                all_keys += row.keys
                unless row.keys.include? @key
                    raise ReindeerETL::Errors::RecordInvalid.new("Path#1 missing key: #{@key}")
                end
            end
                
            if source_idx == 0
                # first source?
                rows.push row
            else
                rindex = rows.index{|arow|arow[@key] == row[@key]}
                begin
                    rows[rindex] = rows[rindex].merge(row)
                rescue TypeError
                    raise ReindeerETL::Errors::RecordInvalid.new("Unable to Join source##{source_idx} - row##{idx}")
                end
            end
            idx += 1
        end
    end
    
    rows.each do |row|
        (all_keys - row.keys).each{|k|row[k] = nil}
        yield row
    end
end