class CsvOrm::Ingestor

Attributes

data_set[RW]
file[RW]
headers[RW]
headers_defined[RW]
options[RW]
path[RW]

Public Class Methods

build(path, options) click to toggle source
# File lib/csv_orm/ingestor.rb, line 15
def self.build(path, options)
  self_instance = new(path, options)
  Query.new(self_instance.parse)
end
new(file_path, options={}) click to toggle source
# File lib/csv_orm/ingestor.rb, line 5
def initialize(file_path, options={})
  @path            = File.expand_path(file_path)
  @file            = File.open(path)
  @headers         = [] # will define in first iteration of loop
  @headers_defined = false
  @data_set        = []
  @smart           = options[:smart] == false ? false : true
  @logging         = options[:logging]
end

Public Instance Methods

confidence_in_numeric?(string) click to toggle source
# File lib/csv_orm/ingestor.rb, line 50
def confidence_in_numeric?(string)
  return true if string =~ /\A\d+\Z/
  true if Float(string) rescue false
end
infer_data_type(field) click to toggle source
# File lib/csv_orm/ingestor.rb, line 36
def infer_data_type(field)
  # currently supporting time to integer conversion
  return field.to_s unless @smart

  # integers are almost certainly not meant to be time. probably will have to address this at some point.
  return field if confidence_in_numeric?(field)

  date = DateTime.parse(field) rescue nil
  if date
    return date.to_time.to_i
  end
  field.to_s
end
parse() click to toggle source
# File lib/csv_orm/ingestor.rb, line 20
def parse
  begin_time = Time.now
  CSV.parse(file) do |row|
    unless @headers_defined
      @headers = row.map {|header| header.gsub(/[ -]/, '_').downcase.to_sym }
    end
    parsed_row = row.map {|field| infer_data_type(field) }
    @data_set << OpenStruct.new(Hash[headers.zip(parsed_row)]) if @headers_defined
    @headers_defined = true
    puts "Parsed row #{$.}" if @logging
  end
  end_time = Time.now
  puts "Parsed #{@file.path} in #{end_time - begin_time} seconds" if @logging
  @data_set
end