class RDF::Tabular::Row
Wraps each resulting row
Constants
- Cell
Class for returning values
Attributes
Context from Table
with base set to table URL for expanding URI Templates @return [JSON::LD::Context]
Row
number of this row @return [Integer]
Cells providing a unique row identifier @return [Array<Cell>]
Row
number of this row from the original source @return [Integer]
Table
containing this row @return [Table]
Title(s) of this row @return [Array<RDF::Literal>]
Row
values, hashed by ‘name`
Public Class Methods
@param [Array<Array<String>>] row @param [Metadata] metadata for Table
@param [Integer] number 1-based row number after skipped/header rows @param [Integer] source_number 1-based row number from source @param [Hash{Symbol => Object}] options ({}) @option options [Boolean] :validate check for PK/FK consistency @return [Row]
# File lib/rdf/tabular/metadata.rb, line 2036 def initialize(row, metadata, number, source_number, **options) @table = metadata @number = number @sourceNumber = source_number @values = [] skipColumns = metadata.dialect.skipColumns.to_i @context = table.context.dup @context.base = table.url # Create values hash # SPEC CONFUSION: are values pre-or-post conversion? map_values = {"_row" => number, "_sourceRow" => source_number} columns = metadata.tableSchema.columns ||= [] non_virtual_columns = columns.reject(&:virtual) if row.length < non_virtual_columns.length raise Error, "Row #{source_number} has #{row.length} columns, expected #{non_virtual_columns.length}" end # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns columns.each_with_index {|c, index| row[index] ||= c.null} row.each_with_index do |value, index| next if index < skipColumns cell_errors = [] # create column if necessary columns[index - skipColumns] ||= Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)) column = columns[index - skipColumns] @values << cell = Cell.new(metadata, column, self, value) datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column)) value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base) value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base) # if the resulting string is an empty string, apply the remaining steps to the string given by the default property value = column.default || '' if value.empty? cell_values = column.separator ? value.split(column.separator) : [value] cell_values = cell_values.map do |v| v = v.strip unless %w(string anyAtomicType).include?(datatype.base) v = column.default || '' if v.empty? if Array(column.null).include?(v) nil else expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true) if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal) lit_or_errors else cell_errors += lit_or_errors RDF::Literal(v, language: (column.lang unless column.lang == "und")) end end end.compact # Check for required values if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?) cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}" end cell.value = (column.separator ? cell_values : cell_values.first) cell.errors = cell_errors map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s) end # Record primaryKey if validating @primaryKey = @values. select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate] # Record any row titles @titles = @values. select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}. map(&:value) # Map URLs for row @values.each_with_index do |cell, index| mapped_values = map_values.merge( "_name" => CGI.unescape(cell.column.name), "_column" => cell.column.number, "_sourceColumn" => cell.column.sourceNumber ) cell.set_urls(mapped_values, options[:decode_uri]) end end
Public Instance Methods
Identifier for this row, as an RFC7111 fragment @return [RDF::URI]
# File lib/rdf/tabular/metadata.rb, line 2130 def id; u = table.url.dup u.fragment = "row=#{self.sourceNumber}" u end
# File lib/rdf/tabular/metadata.rb, line 2148 def inspect self.class.name + to_atd.inspect end
Return Annotated Row
representation
# File lib/rdf/tabular/metadata.rb, line 2137 def to_atd { "@id" => id.to_s, "@type" => "Row", "table" => (table.id || table.url), "number" => self.number, "sourceNumber" => self.sourceNumber, "cells" => @values.map(&:value) }.delete_if {|k,v| v.nil?} end
Private Instance Methods
given a datatype specification, return a literal matching that specififcation, if found, otherwise nil @return [RDF::Literal]
# File lib/rdf/tabular/metadata.rb, line 2156 def value_matching_datatype(value, datatype, expanded_dt, language) lit, value_errors = nil, [] original_value = value.dup format = datatype.format # Datatype specific constraints and conversions case datatype.base.to_sym when :decimal, :integer, :long, :int, :short, :byte, :nonNegativeInteger, :positiveInteger, :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte, :nonPositiveInteger, :negativeInteger, :double, :float, :number # Normalize representation based on numeric-specific facets format = case format when String then {"pattern" => format} when Hash then format else {} end groupChar = format["groupChar"] decimalChar = format["decimalChar"] || '.' pattern = format["pattern"] begin value = datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar) rescue UAX35::ParseError value_errors << "#{value} does not match numeric pattern #{pattern ? pattern.inspect : 'default'}" end lit = RDF::Literal(value, datatype: expanded_dt) if !lit.plain? && datatype.minimum && lit < datatype.minimum value_errors << "#{value} < minimum #{datatype.minimum}" end case when datatype.minimum && lit < datatype.minimum value_errors << "#{value} < minimum #{datatype.minimum}" when datatype.maximum && lit > datatype.maximum value_errors << "#{value} > maximum #{datatype.maximum}" when datatype.minInclusive && lit < datatype.minInclusive value_errors << "#{value} < minInclusive #{datatype.minInclusive}" when datatype.maxInclusive && lit > datatype.maxInclusive value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" when datatype.minExclusive && lit <= datatype.minExclusive value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" when datatype.maxExclusive && lit >= datatype.maxExclusive value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}" end when :boolean if format # True/False determined by Y|N values t, f = format.to_s.split('|', 2) case when value == t lit = RDF::Literal::TRUE when value == f lit = RDF::Literal::FALSE else value_errors << "#{value} does not match boolean format #{format}" end else if %w(1 true).include?(value.downcase) lit = RDF::Literal::TRUE elsif %w(0 false).include?(value.downcase) lit = RDF::Literal::FALSE else value_errors << "#{value} does not match boolean" end end when :date, :time, :dateTime, :dateTimeStamp, :datetime begin value = datatype.parse_uax35_date(format, value) lit = RDF::Literal(value, datatype: expanded_dt) rescue UAX35::ParseError value_errors << "#{value} does not match format #{format}" end when :duration, :dayTimeDuration, :yearMonthDuration # SPEC CONFUSION: surely format also includes that for other duration types? re = Regexp.new(format) rescue nil if re.nil? ||value.match(re) lit = RDF::Literal(value, datatype: expanded_dt) else value_errors << "#{value} does not match format #{format}" end when :hexBinary, :base64Binary lit = RDF::Literal.new(value, datatype: expanded_dt) unless lit.valid? value_errors << "#{value} is invalid" lit = RDF::Literal.new(value) else if datatype.length && lit.object.length != datatype.length value_errors << "decoded #{value} has length #{lit.object.length} not #{datatype.length}" end if datatype.minLength && lit.object.length < datatype.minLength value_errors << "decoded #{value} has length #{lit.object.length} not >= #{datatype.minLength}" end if datatype.maxLength && lit.object.length > datatype.maxLength value_errors << "decoded #{value} has length #{lit.object.length} not <= #{datatype.maxLength}" end end when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS, :ENTITY, :ID, :IDREF, :NOTATION value_errors << "#{value} uses unsupported datatype: #{datatype.base}" else # For other types, format is a regexp re = Regexp.new(format) rescue nil unless re.nil? || value.match(re) value_errors << "#{value} does not match format #{format}" end lit = if value_errors.empty? if expanded_dt == RDF::XSD.string # Type string will still use language RDF::Literal(value, language: (language unless language == "und")) else RDF::Literal(value, datatype: expanded_dt) end end end if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym) value_errors << "#{value} does not have length #{datatype.length}" end if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym) value_errors << "#{value} does not have length >= #{datatype.minLength}" end if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym) value_errors << "#{value} does not have length <= #{datatype.maxLength}" end # value constraints value_errors << "#{value} < minimum #{datatype.minimum}" if datatype.minimum && lit < datatype.minimum value_errors << "#{value} > maximum #{datatype.maximum}" if datatype.maximum && lit > datatype.maximum value_errors << "#{value} < minInclusive #{datatype.minInclusive}" if datatype.minInclusive && lit < datatype.minInclusive value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" if datatype.maxInclusive && lit > datatype.maxInclusive value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive # Final value is a valid literal, or a plain literal otherwise value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid? # Either return matched literal value or errors value_errors.empty? ? lit : value_errors end