class CSVImporter::CSVReader

Reads, sanitize and parse a CSV file

Constants

SEPARATORS

Public Instance Methods

csv_rows() click to toggle source
# File lib/csv_importer/csv_reader.rb, line 13
def csv_rows
  @csv_rows ||= begin
    sane_content = sanitize_content(read_content)
    separator = detect_separator(sane_content)
    cells = CSV.parse(
      sane_content,
      col_sep: separator, quote_char: quote_char, skip_blanks: true,
      external_encoding: source_encoding
    )
    sanitize_cells(encode_cells(cells))
  end
end
header() click to toggle source

Returns the header as an Array of Strings

# File lib/csv_importer/csv_reader.rb, line 27
def header
  @header ||= csv_rows.first
end
rows() click to toggle source

Returns the rows as an Array of Arrays of Strings

# File lib/csv_importer/csv_reader.rb, line 32
def rows
  @rows ||= csv_rows[1..-1]
end

Private Instance Methods

detect_separator(csv_content) click to toggle source
# File lib/csv_importer/csv_reader.rb, line 58
def detect_separator(csv_content)
  SEPARATORS.min_by do |separator|
    csv_content.count(separator)

    all_lines = csv_content.lines
    base_number = all_lines.first.count(separator)

    if base_number.zero?
      Float::MAX
    else
      all_lines.map{|line| line.count(separator) - base_number }.map(&:abs).inject(0) { |sum, i| sum + i }
    end
  end
end
encode_cells(rows) click to toggle source
# File lib/csv_importer/csv_reader.rb, line 82
def encode_cells(rows)
  rows.map do |cells|
    cells.map do |cell|
      cell ? cell.encode(target_encoding) : ""
    end
  end
end
read_content() click to toggle source
# File lib/csv_importer/csv_reader.rb, line 38
def read_content
  if content
    content
  elsif file
    file.read
  elsif path
    File.open(path).read
  else
    raise Error, "Please provide content, file, or path"
  end
end
sanitize_cells(rows) click to toggle source

Remove trailing white spaces and ensure we always return a string

# File lib/csv_importer/csv_reader.rb, line 74
def sanitize_cells(rows)
  rows.map do |cells|
    cells.map do |cell|
      cell ? cell.strip : ""
    end
  end
end
sanitize_content(csv_content) click to toggle source
# File lib/csv_importer/csv_reader.rb, line 50
def sanitize_content(csv_content)
  csv_content
    .encode(Encoding.find(source_encoding), invalid: :replace, undef: :replace, replace: '') # Remove invalid byte sequences
    .gsub(/\r\r?\n?/, "\n") # Replaces windows line separators with "\n"
end
source_encoding() click to toggle source
# File lib/csv_importer/csv_reader.rb, line 90
def source_encoding
  encoding.split(':').first || 'UTF-8'
end
target_encoding() click to toggle source
# File lib/csv_importer/csv_reader.rb, line 94
def target_encoding
  encoding.split(':').last || 'UTF-8'
end