class CSVPP::Format

Attributes

col_sep[R]
description[R]
multiline_start[R]
name[R]
skip[R]
vars[R]
vars_grouped_by_line[R]

Public Class Methods

load(path) click to toggle source

@param path [String] path to format file @return [Format]

# File lib/csvpp/format.rb, line 7
def self.load(path)
  return path if path.is_a? Format
  load_from_str File.read(path)
end
load_from_str(json) click to toggle source

@param json [String] @return [Format]

# File lib/csvpp/format.rb, line 14
def self.load_from_str(json)
  return json if json.is_a? Format
  new Oj.load(json)
end
new(format) click to toggle source

@param format [Hash]

# File lib/csvpp/format.rb, line 20
def initialize(format)
  @name = format['name']
  @description = format['description']
  @multiline = format['multiline'].to_s.strip.downcase == 'true'
  @col_sep = format['column_separator']
  @skip = format['skip'].to_i
  @vars = format.fetch('vars')

  if multiline?
    @vars_grouped_by_line = Hash[
      vars.group_by { |var, meta| meta['line'] }.map do |line_id, vars|
        [line_id, vars.map { |var, *| var }]
      end
    ]

    @multiline_start = format.fetch('start')
  end

  # Cache for actual indices because formats provide 1-based human readable
  # positions. Only matters when parsing files with 30k+ line files. See
  # #index(var).
  @indices = {}
end

Public Instance Methods

false_values(var) click to toggle source

Returns the values that are defined as `false` in the the format's json definition for the given variable. @return [Array] all values that should be interpreted as `false` for this variable

# File lib/csvpp/format.rb, line 81
def false_values(var)
  return [] unless type(var) == "boolean"
  array_from(var, 'false_values')
end
index(var) click to toggle source
# File lib/csvpp/format.rb, line 52
def index(var)
  @indices[var] ||= position(var) - 1
end
length() click to toggle source
# File lib/csvpp/format.rb, line 48
def length
  var_names.count
end
missings(var) click to toggle source

@param var [String]: name of the variable for which the missings are required @return [Array] an array of missing values (can be empty if no missings were defined)

# File lib/csvpp/format.rb, line 66
def missings(var)
  array_from(var, 'missings')
end
multiline?() click to toggle source
# File lib/csvpp/format.rb, line 94
def multiline?
  @multiline
end
multiline_start?(line_id) click to toggle source
# File lib/csvpp/format.rb, line 90
def multiline_start?(line_id)
  multiline_start == line_id
end
position(var) click to toggle source
# File lib/csvpp/format.rb, line 56
def position(var)
  vars.fetch(var)['position']
end
to_s() click to toggle source
# File lib/csvpp/format.rb, line 98
def to_s
  "#{name.ljust(30)}\t| #{description}"
end
true_values(var) click to toggle source

Returns the values that are defined as `true` in the the format's json definition for the given variable. @return [Array] all values that should be interpreted as true for this variable

# File lib/csvpp/format.rb, line 73
def true_values(var)
  return [] unless type(var) == "boolean"
  array_from(var, 'true_values')
end
type(var) click to toggle source
# File lib/csvpp/format.rb, line 60
def type(var)
  vars.fetch(var)['type']
end
var_names() click to toggle source
# File lib/csvpp/format.rb, line 44
def var_names
  vars.keys
end
vars_for_line(line_id) click to toggle source
# File lib/csvpp/format.rb, line 86
def vars_for_line(line_id)
  vars_grouped_by_line.fetch(line_id)
end

Private Instance Methods

array_from(var, attribute) click to toggle source

Returns the value or values specified for the given attribute of the given variable in the formats json. An empty array if no such attribute was defined for the given variable. @return [Array] value(s) defined for given attribute for given variable

# File lib/csvpp/format.rb, line 108
def array_from(var, attribute)
  value = vars.fetch(var)[attribute]
  return [] if value.nil?
  return value if value.is_a?(Array)
  [value]
end