class CSVImporter::CSVReader
Reads, sanitize and parse a CSV file
Constants
- SEPARATORS
Public Instance Methods
csv_rows()
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 13 def csv_rows @csv_rows ||= begin sane_content = sanitize_content(read_content) separator = detect_separator(sane_content) cells = CSV.parse( sane_content, col_sep: separator, quote_char: quote_char, skip_blanks: true, external_encoding: source_encoding ) sanitize_cells(encode_cells(cells)) end end
header()
click to toggle source
Returns the header as an Array of Strings
# File lib/csv_importer/csv_reader.rb, line 27 def header @header ||= csv_rows.first end
rows()
click to toggle source
Returns the rows as an Array of Arrays of Strings
# File lib/csv_importer/csv_reader.rb, line 32 def rows @rows ||= csv_rows[1..-1] end
Private Instance Methods
detect_separator(csv_content)
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 58 def detect_separator(csv_content) SEPARATORS.min_by do |separator| csv_content.count(separator) all_lines = csv_content.lines base_number = all_lines.first.count(separator) if base_number.zero? Float::MAX else all_lines.map{|line| line.count(separator) - base_number }.map(&:abs).inject(0) { |sum, i| sum + i } end end end
encode_cells(rows)
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 82 def encode_cells(rows) rows.map do |cells| cells.map do |cell| cell ? cell.encode(target_encoding) : "" end end end
read_content()
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 38 def read_content if content content elsif file file.read elsif path File.open(path).read else raise Error, "Please provide content, file, or path" end end
sanitize_cells(rows)
click to toggle source
Remove trailing white spaces and ensure we always return a string
# File lib/csv_importer/csv_reader.rb, line 74 def sanitize_cells(rows) rows.map do |cells| cells.map do |cell| cell ? cell.strip : "" end end end
sanitize_content(csv_content)
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 50 def sanitize_content(csv_content) csv_content .encode(Encoding.find(source_encoding), invalid: :replace, undef: :replace, replace: '') # Remove invalid byte sequences .gsub(/\r\r?\n?/, "\n") # Replaces windows line separators with "\n" end
source_encoding()
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 90 def source_encoding encoding.split(':').first || 'UTF-8' end
target_encoding()
click to toggle source
# File lib/csv_importer/csv_reader.rb, line 94 def target_encoding encoding.split(':').last || 'UTF-8' end