class TableSchema::Infer
Attributes
schema[R]
Public Class Methods
new(headers, rows, explicit: false, primary_key: nil, row_limit: nil)
click to toggle source
# File lib/tableschema/infer.rb, line 11 def initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil) @headers = headers @rows = rows @explicit = explicit @primary_key = primary_key @row_limit = row_limit @schema = { fields: fields } @schema[:primaryKey] = @primary_key if @primary_key infer! end
Public Instance Methods
available_types()
click to toggle source
# File lib/tableschema/infer.rb, line 128 def available_types [ 'any', 'string', 'boolean', 'number', 'integer', 'date', 'time', 'datetime', 'array', 'object', 'geopoint', 'geojson' ] end
fields()
click to toggle source
# File lib/tableschema/infer.rb, line 25 def fields @headers.map do |header| descriptor = { name: header, title: '', description: '', } constraints = {} constraints[:required] = @explicit === true constraints[:unique] = (header == @primary_key) constraints.delete_if { |_,v| v == false } unless @explicit === true descriptor[:constraints] = constraints if constraints.count > 0 TableSchema::Field.new(descriptor) end end
guess_format(converter, col)
click to toggle source
# File lib/tableschema/infer.rb, line 89 def guess_format(converter, col) guessed_format = TableSchema::DEFAULTS[:format] converter.class.instance_methods.grep(/cast_/).each do |method| begin format = method.to_s format.slice!('cast_') next if format == TableSchema::DEFAULTS[:format] converter.send(method, col) guessed_format = format break rescue TableSchema::Exception next end end guessed_format end
guess_type(col, index)
click to toggle source
# File lib/tableschema/infer.rb, line 69 def guess_type(col, index) guessed_type = TableSchema::DEFAULTS[:type] guessed_format = TableSchema::DEFAULTS[:format] available_types.reverse_each do |type| klass = get_class_for_type(type) converter = Kernel.const_get(klass).new(@schema[:fields][index]) if converter.test(col) === true guessed_type = type guessed_format = guess_format(converter, col) break end end { type: guessed_type, format: guessed_format } end
infer!()
click to toggle source
# File lib/tableschema/infer.rb, line 42 def infer! type_matches = [] @rows.each_with_index do |row, index| break if @row_limit && index > @row_limit row = row.fields if row.class == CSV::Row row_length = row.count headers_length = @headers.count if row_length > headers_length row = row[0..headers_length] elsif row_length < headers_length diff = headers_length - row_length fill = [''] * diff row = row.push(fill).flatten end row.each_with_index do |col, idx| type_matches[idx] ||= [] type_matches[idx] << guess_type(col, idx) end end resolve_types(type_matches) @schema = TableSchema::Schema.new(@schema) end
resolve_types(results)
click to toggle source
# File lib/tableschema/infer.rb, line 106 def resolve_types(results) results.each_with_index do |result,v| result.uniq! if result.count == 1 rv = result[0] else counts = {} result.each do |r| counts[r] ||= 0 counts[r] += 1 end sorted_counts = counts.sort_by {|_key, value| value} rv = sorted_counts[0][0] end @schema[:fields][v].merge!(rv) end end