class TableSchema::Table

Attributes

headers[R]

Public

schema[R]

Public

Public Class Methods

new(source, schema: nil, csv_options: {}) click to toggle source
# File lib/tableschema/table.rb, line 8
def initialize(source, schema: nil, csv_options: {})
  @csv_options = csv_options.merge(headers: true)
  @csv = parse_csv(source)
  @descriptor = schema
  @headers = initialize_headers
  if !@descriptor.nil?
    @schema = TableSchema::Schema.new(@descriptor)
    initialize_unique_colums
  end
end

Public Instance Methods

infer() click to toggle source
# File lib/tableschema/table.rb, line 47
def infer()
  if !@schema
    inferer = TableSchema::Infer.new(@headers, @csv)
    @schema = inferer.schema
    initialize_unique_colums
    @csv.rewind
  end
  @schema.descriptor
end
iter(keyed: false, cast: true, limit: nil) { |to_h| ... } click to toggle source
# File lib/tableschema/table.rb, line 19
def iter(keyed: false, cast: true, limit: nil)
  unless block_given?
    return enum_for(:iter, limit: limit, cast: cast, keyed: keyed)
  end

  @csv.each_with_index do |row, i|
    break if limit && (limit <= i)
    if cast == true
      cast_values = @schema.cast_row(row)
      row = CSV::Row.new(@headers, cast_values)
      check_unique_fields(row, i)
    end
    if keyed == true
      yield row.to_h
    else
      yield row.fields
    end
    collect_unique_fields(row, i)
  end

  @csv.rewind
end
read(keyed: false, cast: true, limit: nil) click to toggle source
# File lib/tableschema/table.rb, line 42
def read(keyed: false, cast: true, limit: nil)
  iterator = self.iter(keyed: keyed, cast: cast, limit: limit)
  iterator.to_a
end
save(target) click to toggle source
# File lib/tableschema/table.rb, line 57
def save(target)
  CSV.open(target, "wb", @csv_options) do |csv|
    csv << @headers
    self.iter{ |row| csv << row }
  end
  true
end

Private Instance Methods

array_to_csv(array) click to toggle source
# File lib/tableschema/table.rb, line 74
def array_to_csv(array)
  array.map { |row| row.to_csv(row_sep: nil) }.join("\r\n")
end
check_unique_fields(row, row_number) click to toggle source
# File lib/tableschema/table.rb, line 95
def check_unique_fields(row, row_number)
  @unique_columns.each do |col_name, values|
    row_value = row[col_name]
    previous_values = values[0..row_number-1]
    previous_values.map!{|value| @schema.get_field(col_name).cast_type(value)}
    if previous_values.include?(row_value)
      raise TableSchema::ConstraintError.new("The values for the field `#{col_name}` should be unique but value `#{row_value}` is repeated")
    end
  end
end
collect_unique_fields(row, row_number) click to toggle source
# File lib/tableschema/table.rb, line 91
def collect_unique_fields(row, row_number)
  @unique_columns.each { |col_name, values| values[row_number] = row[col_name] }
end
initialize_headers() click to toggle source
# File lib/tableschema/table.rb, line 78
def initialize_headers
  headers = @csv.first.to_h.keys
  @csv.rewind
  headers
end
initialize_unique_colums() click to toggle source
# File lib/tableschema/table.rb, line 84
def initialize_unique_colums
  @unique_columns = {}
  unless @schema.unique_headers.empty?
    @schema.unique_headers.each{ |header| @unique_columns[header] = [] }
  end
end
parse_csv(csv) click to toggle source

Private

# File lib/tableschema/table.rb, line 69
def parse_csv(csv)
  csv = csv.is_a?(Array) ? StringIO.new(array_to_csv csv) : open(csv)
  CSV.new(csv, @csv_options)
end