module BaseballCalc::Importer
The Importer
is responsible for reading the CSV files and returning initialized objects to the BaseballCalc
module. An alternative pattern would have been to make the Player
and BattingSummary
classes able to import themselves, but this approach prevents multiple passes over the CSV files and extracts all of the import logic into a single place.
Public Class Methods
# File lib/baseball_calc/importer.rb, line 77 def Importer.has_data_errors? @dup_player_ids_in_pf.present? || @row_nums_with_no_player_ids_in_pf.present? || @row_nums_with_no_player_ids_in_bf.present? || @row_nums_with_missing_batting_data_in_bf.present? || @player_ids_with_no_batting_data.present? end
# File lib/baseball_calc/importer.rb, line 8 def Importer.import(player_file_path, batting_file_path) @players = {} @batting_summaries = {} @dup_player_ids_in_pf = {} @row_nums_with_no_player_ids_in_pf = [] # Tracks rows numbers that have empty values in the player file playerID column @row_nums_with_no_player_ids_in_bf = [] # Tracks rows numbers that have empty values in the batting file playerID column @row_nums_with_missing_batting_data_in_bf = [] @player_ids_with_no_batting_data = [] Importer.import_players(player_file_path) Importer.import_batting_summaries(batting_file_path) Importer.set_batting_summaries @players end
Iterates over the Batting.csv file and creates a BattingSummary
object for each record (if possible). Errors are noted for later display. $INPUT_LINE_NUMBER is available when using the builtin in ‘english’ library
# File lib/baseball_calc/importer.rb, line 47 def Importer.import_batting_summaries(batting_file_path) batting_file = File.open(batting_file_path, "r") CSV.foreach(batting_file_path, {:headers => true}) do |row| data = row.to_hash playerId = data["playerID"] if playerId.blank? @row_nums_with_no_player_ids_in_bf.push($INPUT_LINE_NUMBER) else player = @players[playerId] batting_summary = BattingSummary.new(player, data) @row_nums_with_missing_batting_data_in_bf.push($INPUT_LINE_NUMBER) if batting_summary.has_bad_batting_data? @batting_summaries[playerId] = [] unless @batting_summaries.key?(playerId) @batting_summaries[playerId].push(batting_summary) end end batting_file.close end
Iterates over the Master-small.csv file and creates a Player
object for each record (if possible). Errors are noted for later display. $INPUT_LINE_NUMBER is available when using the builtin in ‘english’ library
# File lib/baseball_calc/importer.rb, line 27 def Importer.import_players(player_file_path) player_file = File.open(player_file_path, "r") CSV.foreach(player_file.path, {:headers => true}) do |row| player = Player.new(row.to_hash) if player.player_id.blank? @row_nums_with_no_player_ids_in_pf.push($INPUT_LINE_NUMBER) else @dup_player_ids_in_pf[player.player_id] = nil if @players.key?(player.player_id) @players[player.player_id] = player end end player_file.close end
Connects a list of batting_summaries to their respective player object.
# File lib/baseball_calc/importer.rb, line 67 def Importer.set_batting_summaries @players.each do |player_id, player| if @batting_summaries.key?(player_id) player.batting_summaries = @batting_summaries[player_id] else @player_ids_with_no_batting_data << player_id end end end
# File lib/baseball_calc/importer.rb, line 134 def Importer.write_batting_data_errors_header(file_path) return if @row_nums_with_no_player_ids_in_bf.blank? && @row_nums_with_missing_batting_data_in_bf.blank? && @player_ids_with_no_batting_data $stdout.puts " " $stdout.puts " Issues in #{File.basename(file_path)}".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 126 def Importer.write_dup_player_ids_in_pf_errors(limit) return if @dup_player_ids_in_pf.blank? reduced = @dup_player_ids_in_pf.size > (limit-1) ? @dup_player_ids_in_pf[0..limit-1] : @dup_player_ids_in_pf str = @dup_player_ids_in_pf.size > (limit-1) ? "The first #{limit} playerIDs are " : "" $stdout.puts " - #{@dup_player_ids_in_pf.size} playerIDs have been used for multiple players. #{str}[#{reduced.keys.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR) end
Main method for writing the data error notice
# File lib/baseball_calc/importer.rb, line 83 def Importer.write_errors(player_file_path, batting_file_path, limit) return if !Importer.has_data_errors? Importer.write_problems_notice_start Importer.write_player_data_errors_header(player_file_path) Importer.write_row_nums_with_no_player_ids_in_pf_errors(limit) Importer.write_dup_player_ids_in_pf_errors(limit) Importer.write_batting_data_errors_header(batting_file_path) Importer.write_row_nums_with_no_player_ids_in_bf_errors(limit) Importer.write_row_nums_with_missing_batting_data_in_bf_errors(limit) Importer.write_player_ids_with_no_batting_data_errors(limit) Importer.write_problems_notice_end end
# File lib/baseball_calc/importer.rb, line 110 def Importer.write_player_data_errors_header(file_path) return if @row_nums_with_no_player_ids_in_pf.blank? && @dup_player_ids_in_pf.blank? $stdout.puts "" $stdout.puts " Issues in #{File.basename(file_path)}".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 157 def Importer.write_player_ids_with_no_batting_data_errors(limit) return if @player_ids_with_no_batting_data.blank? reduced = @player_ids_with_no_batting_data.up_to_limit(limit) str = @player_ids_with_no_batting_data.size > (limit-1) ? "The first #{limit} playerIDs are " : "" $stdout.puts " - #{@player_ids_with_no_batting_data.size} playerIDs do not have any battting data. #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 105 def Importer.write_problems_notice_end $stdout.puts "" $stdout.puts "*******************************************************************************************".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 100 def Importer.write_problems_notice_start $stdout.puts "*******************************************************************************************".colorize(BaseballCalc::ERROR_COLOR) $stdout.puts "NOTICE: Problems have been found with the DATA which may impact the accuracy of the results.".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 149 def Importer.write_row_nums_with_missing_batting_data_in_bf_errors(limit) return if @row_nums_with_missing_batting_data_in_bf.blank? reduced = @row_nums_with_missing_batting_data_in_bf.up_to_limit(limit) str = @row_nums_with_missing_batting_data_in_bf.size > (limit-1) ? "The first #{limit} row numbers are " : "" $stdout.puts " - #{@row_nums_with_missing_batting_data_in_bf.size} rows have missing data. #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 141 def Importer.write_row_nums_with_no_player_ids_in_bf_errors(limit) return if @row_nums_with_no_player_ids_in_bf.blank? reduced = @row_nums_with_no_player_ids_in_bf.up_to_limit(limit) str = @row_nums_with_no_player_ids_in_bf.size > (limit-1) ? "The first #{limit} row numbers are " : "" $stdout.puts " - #{@row_nums_with_no_player_ids_in_bf.size} row have blank playerIDs. #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR) end
# File lib/baseball_calc/importer.rb, line 117 def Importer.write_row_nums_with_no_player_ids_in_pf_errors(limit) return if @row_nums_with_no_player_ids_in_pf.blank? reduced = @row_nums_with_no_player_ids_in_pf.up_to_limit(limit) str = @row_nums_with_no_player_ids_in_pf.size > (limit-1) ? "The first #{limit} row numbers are " : "" puts " - #{@row_nums_with_no_player_ids_in_pf.size} rows have blank playerIDs. #{str}[#{reduced.to_sentence}]".colorize(BaseballCalc::ERROR_COLOR) end