module BaseballCalc::Importer

The Importer is responsible for reading the CSV files and returning initialized objects to the BaseballCalc module. An alternative pattern would have been to make the Player and BattingSummary classes able to import themselves, but this approach prevents multiple passes over the CSV files and extracts all of the import logic into a single place.

Public Class Methods

has_data_errors?() click to toggle source
# File lib/baseball_calc/importer.rb, line 77
def Importer.has_data_errors?
        @dup_player_ids_in_pf.present? || @row_nums_with_no_player_ids_in_pf.present? || @row_nums_with_no_player_ids_in_bf.present? ||
        @row_nums_with_missing_batting_data_in_bf.present? || @player_ids_with_no_batting_data.present?
end
import(player_file_path, batting_file_path) click to toggle source
# File lib/baseball_calc/importer.rb, line 8
def Importer.import(player_file_path, batting_file_path)
        @players = {}
        @batting_summaries = {}

        @dup_player_ids_in_pf = {}
        @row_nums_with_no_player_ids_in_pf = []  # Tracks rows numbers that have empty values in the player file playerID column
        @row_nums_with_no_player_ids_in_bf = []  # Tracks rows numbers that have empty values in the batting file playerID column
        @row_nums_with_missing_batting_data_in_bf = []
        @player_ids_with_no_batting_data = []
        
        Importer.import_players(player_file_path)
        Importer.import_batting_summaries(batting_file_path)
        Importer.set_batting_summaries
        @players
end
import_batting_summaries(batting_file_path) click to toggle source

Iterates over the Batting.csv file and creates a BattingSummary object for each record (if possible). Errors are noted for later display. $INPUT_LINE_NUMBER is available when using the builtin in ‘english’ library

# File lib/baseball_calc/importer.rb, line 47
def Importer.import_batting_summaries(batting_file_path)
        batting_file = File.open(batting_file_path, "r")

        CSV.foreach(batting_file_path, {:headers => true}) do |row|
                data = row.to_hash
                playerId = data["playerID"]
                if playerId.blank?
                        @row_nums_with_no_player_ids_in_bf.push($INPUT_LINE_NUMBER)
                else
                        player = @players[playerId]
                        batting_summary = BattingSummary.new(player, data)
                        @row_nums_with_missing_batting_data_in_bf.push($INPUT_LINE_NUMBER) if batting_summary.has_bad_batting_data?
                        @batting_summaries[playerId] = [] unless @batting_summaries.key?(playerId)
                        @batting_summaries[playerId].push(batting_summary)
                end
        end
        batting_file.close
end
import_players(player_file_path) click to toggle source

Iterates over the Master-small.csv file and creates a Player object for each record (if possible). Errors are noted for later display. $INPUT_LINE_NUMBER is available when using the builtin in ‘english’ library

# File lib/baseball_calc/importer.rb, line 27
def Importer.import_players(player_file_path)
        player_file = File.open(player_file_path, "r")

        CSV.foreach(player_file.path, {:headers => true}) do |row|

                player = Player.new(row.to_hash)
                if player.player_id.blank?
                        @row_nums_with_no_player_ids_in_pf.push($INPUT_LINE_NUMBER)
                else 
                        @dup_player_ids_in_pf[player.player_id] = nil if @players.key?(player.player_id)
                        @players[player.player_id] = player
                end
        end

        player_file.close
end
set_batting_summaries() click to toggle source

Connects a list of batting_summaries to their respective player object.

# File lib/baseball_calc/importer.rb, line 67
def Importer.set_batting_summaries
        @players.each do |player_id, player|
                if @batting_summaries.key?(player_id)
                        player.batting_summaries = @batting_summaries[player_id]
                else
                        @player_ids_with_no_batting_data << player_id
                end
        end
end
write_batting_data_errors_header(file_path) click to toggle source
# File lib/baseball_calc/importer.rb, line 134
def Importer.write_batting_data_errors_header(file_path)
        return if @row_nums_with_no_player_ids_in_bf.blank? && @row_nums_with_missing_batting_data_in_bf.blank? && @player_ids_with_no_batting_data

        $stdout.puts  " "
        $stdout.puts  "   Issues in #{File.basename(file_path)}".colorize(BaseballCalc::ERROR_COLOR)
end
write_dup_player_ids_in_pf_errors(limit) click to toggle source
# File lib/baseball_calc/importer.rb, line 126
def Importer.write_dup_player_ids_in_pf_errors(limit)
        return if @dup_player_ids_in_pf.blank?

        reduced = @dup_player_ids_in_pf.size > (limit-1) ? @dup_player_ids_in_pf[0..limit-1] : @dup_player_ids_in_pf
        str = @dup_player_ids_in_pf.size > (limit-1) ? "The first #{limit} playerIDs are " : ""
        $stdout.puts  "    - #{@dup_player_ids_in_pf.size} playerIDs have been used for multiple players.  #{str}[#{reduced.keys.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR)
end
write_errors(player_file_path, batting_file_path, limit) click to toggle source

Main method for writing the data error notice

# File lib/baseball_calc/importer.rb, line 83
def Importer.write_errors(player_file_path, batting_file_path, limit)
        return if !Importer.has_data_errors?
        
        Importer.write_problems_notice_start

        Importer.write_player_data_errors_header(player_file_path)
        Importer.write_row_nums_with_no_player_ids_in_pf_errors(limit)
        Importer.write_dup_player_ids_in_pf_errors(limit)

        Importer.write_batting_data_errors_header(batting_file_path)
        Importer.write_row_nums_with_no_player_ids_in_bf_errors(limit)
        Importer.write_row_nums_with_missing_batting_data_in_bf_errors(limit)
        Importer.write_player_ids_with_no_batting_data_errors(limit)

        Importer.write_problems_notice_end
end
write_player_data_errors_header(file_path) click to toggle source
# File lib/baseball_calc/importer.rb, line 110
def Importer.write_player_data_errors_header(file_path)
        return if @row_nums_with_no_player_ids_in_pf.blank? && @dup_player_ids_in_pf.blank?

        $stdout.puts  ""
        $stdout.puts  "   Issues in #{File.basename(file_path)}".colorize(BaseballCalc::ERROR_COLOR)
end
write_player_ids_with_no_batting_data_errors(limit) click to toggle source
# File lib/baseball_calc/importer.rb, line 157
def Importer.write_player_ids_with_no_batting_data_errors(limit)
        return if @player_ids_with_no_batting_data.blank?
        
        reduced = @player_ids_with_no_batting_data.up_to_limit(limit)
        str = @player_ids_with_no_batting_data.size > (limit-1) ? "The first #{limit} playerIDs are " : ""
        $stdout.puts  "    - #{@player_ids_with_no_batting_data.size} playerIDs do not have any battting data.  #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR)                  
end
write_problems_notice_end() click to toggle source
# File lib/baseball_calc/importer.rb, line 105
def Importer.write_problems_notice_end
        $stdout.puts ""
        $stdout.puts "*******************************************************************************************".colorize(BaseballCalc::ERROR_COLOR)
end
write_problems_notice_start() click to toggle source
# File lib/baseball_calc/importer.rb, line 100
def Importer.write_problems_notice_start
        $stdout.puts "*******************************************************************************************".colorize(BaseballCalc::ERROR_COLOR)
        $stdout.puts "NOTICE: Problems have been found with the DATA which may impact the accuracy of the results.".colorize(BaseballCalc::ERROR_COLOR)
end
write_row_nums_with_missing_batting_data_in_bf_errors(limit) click to toggle source
# File lib/baseball_calc/importer.rb, line 149
def Importer.write_row_nums_with_missing_batting_data_in_bf_errors(limit)
        return if @row_nums_with_missing_batting_data_in_bf.blank?
        
        reduced = @row_nums_with_missing_batting_data_in_bf.up_to_limit(limit)
        str = @row_nums_with_missing_batting_data_in_bf.size > (limit-1) ? "The first #{limit} row numbers are " : ""
        $stdout.puts  "    - #{@row_nums_with_missing_batting_data_in_bf.size} rows have missing data.  #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR)
end
write_row_nums_with_no_player_ids_in_bf_errors(limit) click to toggle source
# File lib/baseball_calc/importer.rb, line 141
def Importer.write_row_nums_with_no_player_ids_in_bf_errors(limit)
        return if @row_nums_with_no_player_ids_in_bf.blank?

        reduced = @row_nums_with_no_player_ids_in_bf.up_to_limit(limit)
        str = @row_nums_with_no_player_ids_in_bf.size > (limit-1) ? "The first #{limit} row numbers are " : ""
        $stdout.puts  "    - #{@row_nums_with_no_player_ids_in_bf.size} row have blank playerIDs.  #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR)
end
write_row_nums_with_no_player_ids_in_pf_errors(limit) click to toggle source
# File lib/baseball_calc/importer.rb, line 117
def Importer.write_row_nums_with_no_player_ids_in_pf_errors(limit)
        return if @row_nums_with_no_player_ids_in_pf.blank?

        reduced = @row_nums_with_no_player_ids_in_pf.up_to_limit(limit)
        str = @row_nums_with_no_player_ids_in_pf.size > (limit-1) ? "The first #{limit} row numbers are " : ""
        puts "    - #{@row_nums_with_no_player_ids_in_pf.size} rows have blank playerIDs.  #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR)

end