class DataKit::CSV::SchemaAnalysis

Attributes

fields[R]
row_count[R]
sample_count[R]
type_hints[R]
types[R]
use_type_hints[R]

Public Class Methods

new(fields, options = {}) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 12
def initialize(fields, options = {})
  @fields, @types = fields, {}
  @row_count, @sample_count = 0, 0

  @type_hints = {}

  if options[:use_type_hints].nil? || options[:use_type_hints] == true
    @use_type_hints = true
  else
    @use_type_hints = false
  end

  fields.each do |field_name|
    @types[field_name] = {}
    @type_hints[field_name] = :string
    Dataset::Field::Types.each do |type|
      @types[field_name][type] = 0
    end
  end
end

Public Instance Methods

field_types() click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 52
def field_types
  fields.inject({}) do |result, field_name|
    result[field_name] = type?(field_name)
    result
  end
end
has_only_numeric_types?(field) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 83
def has_only_numeric_types?(field)
  (type_list(field) - [:integer, :number, :null]).length == 0
end
has_single_type?(field) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 79
def has_single_type?(field)
  (type_list(field) - [:null]).length == 1
end
increment_sample() click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 37
def increment_sample
  @sample_count += 1
end
increment_total() click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 33
def increment_total
  @row_count += 1
end
insert(field_name, value) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 41
def insert(field_name, value)
  if use_type_hints
    type = Dataset::Field.type?(value, type_hints[field_name])
    @type_hints[field_name] = type # cache the most recent type
  else
    type = Dataset::Field.type?(value)
  end

  @types[field_name][type] += 1
end
type?(field) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 59
def type?(field)
  if has_single_type?(field)
    type_list(field).first
  elsif has_only_numeric_types?(field)
    :number
  else
    :string
  end
end
type_count(field, type) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 69
def type_count(field, type)
  types[field][type] || 0
end
type_list(field) click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 73
def type_list(field)
  types[field].keys.select do |type|
    type_count(field, type) > 0
  end
end