class DataKit::CSV::SchemaAnalysis
Attributes
fields[R]
row_count[R]
sample_count[R]
type_hints[R]
types[R]
use_type_hints[R]
Public Class Methods
new(fields, options = {})
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 12 def initialize(fields, options = {}) @fields, @types = fields, {} @row_count, @sample_count = 0, 0 @type_hints = {} if options[:use_type_hints].nil? || options[:use_type_hints] == true @use_type_hints = true else @use_type_hints = false end fields.each do |field_name| @types[field_name] = {} @type_hints[field_name] = :string Dataset::Field::Types.each do |type| @types[field_name][type] = 0 end end end
Public Instance Methods
field_types()
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 52 def field_types fields.inject({}) do |result, field_name| result[field_name] = type?(field_name) result end end
has_only_numeric_types?(field)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 83 def has_only_numeric_types?(field) (type_list(field) - [:integer, :number, :null]).length == 0 end
has_single_type?(field)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 79 def has_single_type?(field) (type_list(field) - [:null]).length == 1 end
increment_sample()
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 37 def increment_sample @sample_count += 1 end
increment_total()
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 33 def increment_total @row_count += 1 end
insert(field_name, value)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 41 def insert(field_name, value) if use_type_hints type = Dataset::Field.type?(value, type_hints[field_name]) @type_hints[field_name] = type # cache the most recent type else type = Dataset::Field.type?(value) end @types[field_name][type] += 1 end
type?(field)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 59 def type?(field) if has_single_type?(field) type_list(field).first elsif has_only_numeric_types?(field) :number else :string end end
type_count(field, type)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 69 def type_count(field, type) types[field][type] || 0 end
type_list(field)
click to toggle source
# File lib/data_kit/csv/schema_analysis.rb, line 73 def type_list(field) types[field].keys.select do |type| type_count(field, type) > 0 end end