class RDF::Tabular::Row

Wraps each resulting row

Constants

Cell

Class for returning values

Attributes

context[R]

Context from Table with base set to table URL for expanding URI Templates @return [JSON::LD::Context]

number[R]

Row number of this row @return [Integer]

primaryKey[R]

Cells providing a unique row identifier @return [Array<Cell>]

sourceNumber[R]

Row number of this row from the original source @return [Integer]

table[R]

Table containing this row @return [Table]

titles[R]

Title(s) of this row @return [Array<RDF::Literal>]

values[R]

Row values, hashed by ‘name`

Public Class Methods

new(row, metadata, number, source_number, **options) click to toggle source

@param [Array<Array<String>>] row @param [Metadata] metadata for Table @param [Integer] number 1-based row number after skipped/header rows @param [Integer] source_number 1-based row number from source @param [Hash{Symbol => Object}] options ({}) @option options [Boolean] :validate check for PK/FK consistency @return [Row]

# File lib/rdf/tabular/metadata.rb, line 2036
def initialize(row, metadata, number, source_number, **options)
  @table = metadata
  @number = number
  @sourceNumber = source_number
  @values = []
  skipColumns = metadata.dialect.skipColumns.to_i

  @context = table.context.dup
  @context.base = table.url

  # Create values hash
  # SPEC CONFUSION: are values pre-or-post conversion?
  map_values = {"_row" => number, "_sourceRow" => source_number}

  columns = metadata.tableSchema.columns ||= []
  non_virtual_columns = columns.reject(&:virtual)

  if row.length < non_virtual_columns.length
    raise Error, "Row #{source_number} has #{row.length} columns, expected #{non_virtual_columns.length}"
  end

  # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
  columns.each_with_index {|c, index| row[index] ||= c.null}

  row.each_with_index do |value, index|

    next if index < skipColumns

    cell_errors = []

    # create column if necessary
    columns[index - skipColumns] ||=
      Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))

    column = columns[index - skipColumns]

    @values << cell = Cell.new(metadata, column, self, value)

    datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
    value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
    value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
    # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
    value = column.default || '' if value.empty?

    cell_values = column.separator ? value.split(column.separator) : [value]

    cell_values = cell_values.map do |v|
      v = v.strip unless %w(string anyAtomicType).include?(datatype.base)
      v = column.default || '' if v.empty?
      if Array(column.null).include?(v)
        nil
      else
        expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
        if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
          lit_or_errors
        else
          cell_errors += lit_or_errors
          RDF::Literal(v, language: (column.lang unless column.lang == "und"))
        end
      end
    end.compact

    # Check for required values
    if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?)
      cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}"
    end
    cell.value = (column.separator ? cell_values : cell_values.first)
    cell.errors = cell_errors

    map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
  end

  # Record primaryKey if validating
  @primaryKey = @values.
    select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate]

  # Record any row titles
  @titles = @values.
    select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}.
    map(&:value)

  # Map URLs for row
  @values.each_with_index do |cell, index|
    mapped_values = map_values.merge(
      "_name" => CGI.unescape(cell.column.name),
      "_column" => cell.column.number,
      "_sourceColumn" => cell.column.sourceNumber
    )
    cell.set_urls(mapped_values, options[:decode_uri])
  end
end

Public Instance Methods

id() click to toggle source

Identifier for this row, as an RFC7111 fragment @return [RDF::URI]

# File lib/rdf/tabular/metadata.rb, line 2130
def id;
  u = table.url.dup
  u.fragment = "row=#{self.sourceNumber}"
  u
end
inspect() click to toggle source
# File lib/rdf/tabular/metadata.rb, line 2148
def inspect
  self.class.name + to_atd.inspect
end
to_atd() click to toggle source

Return Annotated Row representation

# File lib/rdf/tabular/metadata.rb, line 2137
def to_atd
  {
    "@id" => id.to_s,
    "@type" => "Row",
    "table" => (table.id || table.url),
    "number" => self.number,
    "sourceNumber" => self.sourceNumber,
    "cells" => @values.map(&:value)
  }.delete_if {|k,v| v.nil?}
end

Private Instance Methods

value_matching_datatype(value, datatype, expanded_dt, language) click to toggle source

given a datatype specification, return a literal matching that specififcation, if found, otherwise nil @return [RDF::Literal]

# File lib/rdf/tabular/metadata.rb, line 2156
def value_matching_datatype(value, datatype, expanded_dt, language)
  lit, value_errors = nil, []
  original_value = value.dup

  format = datatype.format
  # Datatype specific constraints and conversions
  case datatype.base.to_sym
  when :decimal, :integer, :long, :int, :short, :byte,
       :nonNegativeInteger, :positiveInteger,
       :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
       :nonPositiveInteger, :negativeInteger,
       :double, :float, :number

    # Normalize representation based on numeric-specific facets

    format = case format
    when String then {"pattern" => format}
    when Hash then format
    else {}
    end

    groupChar = format["groupChar"]
    decimalChar = format["decimalChar"] || '.'
    pattern = format["pattern"]

    begin
      value = datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
    rescue UAX35::ParseError
      value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}"
    end

    lit = RDF::Literal(value, datatype: expanded_dt)

    if !lit.plain? && datatype.minimum && lit < datatype.minimum
      value_errors << "#{value} < minimum #{datatype.minimum}"
    end
    case
    when datatype.minimum && lit < datatype.minimum
      value_errors << "#{value} < minimum #{datatype.minimum}"
    when datatype.maximum && lit > datatype.maximum
      value_errors << "#{value} > maximum #{datatype.maximum}"
    when datatype.minInclusive && lit < datatype.minInclusive
      value_errors << "#{value} < minInclusive #{datatype.minInclusive}"
    when datatype.maxInclusive && lit > datatype.maxInclusive
      value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"
    when datatype.minExclusive && lit <= datatype.minExclusive
      value_errors << "#{value} <= minExclusive #{datatype.minExclusive}"
    when datatype.maxExclusive && lit >= datatype.maxExclusive
      value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}"
    end
  when :boolean
    if format
      # True/False determined by Y|N values
      t, f = format.to_s.split('|', 2)
      case
      when value == t
        lit = RDF::Literal::TRUE
      when value == f
        lit = RDF::Literal::FALSE
      else
        value_errors << "#{value} does not match boolean format #{format}"
      end
    else
      if %w(1 true).include?(value.downcase)
        lit = RDF::Literal::TRUE
      elsif %w(0 false).include?(value.downcase)
        lit = RDF::Literal::FALSE
      else
        value_errors << "#{value} does not match boolean"
      end
    end
  when :date, :time, :dateTime, :dateTimeStamp, :datetime
    begin
      value = datatype.parse_uax35_date(format, value)
      lit = RDF::Literal(value, datatype: expanded_dt)
    rescue UAX35::ParseError
      value_errors << "#{value} does not match format #{format}"
    end
  when :duration, :dayTimeDuration, :yearMonthDuration
    # SPEC CONFUSION: surely format also includes that for other duration types?
    re = Regexp.new(format) rescue nil
    if re.nil? ||value.match(re)
      lit = RDF::Literal(value, datatype: expanded_dt)
    else
      value_errors << "#{value} does not match format #{format}"
    end
  when :hexBinary, :base64Binary
    lit = RDF::Literal.new(value, datatype: expanded_dt)
    unless lit.valid?
      value_errors << "#{value} is invalid"
      lit = RDF::Literal.new(value)
    else
      if datatype.length && lit.object.length != datatype.length
        value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}"
      end
      if datatype.minLength && lit.object.length < datatype.minLength
        value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}"
      end
      if datatype.maxLength && lit.object.length > datatype.maxLength
        value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}"
      end
    end
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
       :ENTITY, :ID, :IDREF, :NOTATION
    value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
  else
    # For other types, format is a regexp
    re = Regexp.new(format) rescue nil
    unless re.nil? || value.match(re)
      value_errors << "#{value} does not match format #{format}"
    end
    lit = if value_errors.empty?
      if expanded_dt == RDF::XSD.string
        # Type string will still use language
        RDF::Literal(value, language: (language unless language == "und"))
      else
        RDF::Literal(value, datatype: expanded_dt)
      end
    end
  end

  if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
    value_errors << "#{value} does not have length #{datatype.length}"
  end
  if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
    value_errors << "#{value} does not have length >= #{datatype.minLength}"
  end
  if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
    value_errors << "#{value} does not have length <= #{datatype.maxLength}"
  end

  # value constraints
  value_errors << "#{value} < minimum #{datatype.minimum}"            if datatype.minimum && lit < datatype.minimum
  value_errors << "#{value} > maximum #{datatype.maximum}"            if datatype.maximum && lit > datatype.maximum
  value_errors << "#{value} < minInclusive #{datatype.minInclusive}"  if datatype.minInclusive && lit < datatype.minInclusive
  value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"  if datatype.maxInclusive && lit > datatype.maxInclusive
  value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive
  value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive

  # Final value is a valid literal, or a plain literal otherwise
  value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?

  # Either return matched literal value or errors
  value_errors.empty? ? lit : value_errors
end