class RDF::Tabular::Metadata

Constants

DATATYPES

Valid datatypes

INHERITED_DEFAULTS
INHERITED_PROPERTIES

Inheritect properties, valid for all types

LOCAL_CONTEXT

Local version of the context @return [JSON::LD::Context]

NAME_SYNTAX

A name is restricted according to the following RegExp. @return [RegExp]

Attributes

filenames[R]

Filename(s) (URI) of opened metadata, if any May be plural when merged @return [Array<RDF::URI>] filenames

id[R]

ID of this Metadata @return [RDF::URI]

object[RW]

Hash representation @return [Hash<Symbol,Object>]

parent[R]

Parent of this Metadata (TableGroup for Table, …) @return [Metadata]

url[R]

URL of related resource @return [RDF::URI]

Public Class Methods

for_input(input, **options) click to toggle source

Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file @param [IO, StringIO] input @param [Hash{Symbol => Object}] options @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location @option options [RDF::URI] :base

The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.

@return [Metadata]

# File lib/rdf/tabular/metadata.rb, line 178
def self.for_input(input, **options)
  base = options[:base]

  # Use user metadata, if provided
  metadata = case options[:metadata]
  when Metadata then options[:metadata]
  when Hash
    Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
  when String, RDF::URI
    Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
  end

  # Search for metadata until found

  # load link metadata, if available
  if !metadata && input.respond_to?(:links) && 
    link = input.links.find_link(%w(rel describedby))
    link_loc = RDF::URI(base).join(link.href).to_s
    md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
    if md
      # Metadata must describe file to be useful
      if md.describes_file?(base)
        metadata = md
      else
        log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
      end
    end
  end

  locs = []
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
  if !metadata && base
    templates = site_wide_config(base)
    log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
    locs = templates.map do |template|
      t = Addressable::Template.new(template)
      mapped = t.expand(url: base).to_s
      mapped = RDF::URI.decode(mapped) if options[:decode_uri]
      RDF::URI(base).join(mapped)
    end
    log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}

    locs.each do |loc|
      metadata ||= begin
        md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
        # Metadata must describe file to be useful
        if md
          # Metadata must describe file to be useful
          if md.describes_file?(base)
            md
          else
            log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
            nil
          end
        end
      rescue IOError
        log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
        nil
      end
    end
  end

  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
  metadata = case
  when metadata then metadata
  when base     then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
  else               TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
  end

  # Make TableGroup, if not already
  metadata.is_a?(TableGroup) ? metadata : metadata.to_table_group
end
new(input, **options) click to toggle source

@private

# File lib/rdf/tabular/metadata.rb, line 253
def self.new(input, **options)
  # Triveal case
  return input if input.is_a?(Metadata)

  object = case input
  when Hash then input
  when IO, StringIO then ::JSON.parse(input.read)
  else ::JSON.parse(input.to_s)
  end

  raise ::JSON::ParserError unless object.is_a?(Hash)

  unless options[:parent]
    # Add context, if not set (which it should be)
    object['@context'] ||= options.delete(:@context) || options[:context]
  end

  klass = case
    when !self.equal?(RDF::Tabular::Metadata)
      self # subclasses can be directly constructed without type dispatch
    else
      type =  options[:type].to_sym if options[:type]

      # Figure out type by @type
      type ||= object['@type'].to_sym if object['@type']

      # Otherwise, Figure out type by site
      object_keys = object.keys.map(&:to_s)
      type ||= case
      when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
      when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
      when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template
      when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
      when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
      when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
      when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
      end

      case type.to_s.to_sym
      when :TableGroup, :"" then RDF::Tabular::TableGroup
      when :Table then RDF::Tabular::Table
      when :Template then RDF::Tabular::Transformation
      when :Schema then RDF::Tabular::Schema
      when :Column then RDF::Tabular::Column
      when :Dialect then RDF::Tabular::Dialect
      else
        raise Error, "Unknown metadata type: #{type.inspect}"
      end
    end

  md = klass.allocate
  md.send(:initialize, object, **options)
  md
rescue ::JSON::ParserError
  raise Error, "Expected input to be a JSON Object"
end
new(input, **options) click to toggle source

Create Metadata from IO, Hash or String

@param [Metadata, Hash, read] input @param [Hash{Symbol => Object}] options @option options [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect] :type

Type of schema, if not set, intuited from properties

@option options [JSON::LD::Context] context

Context used for this metadata. Taken from input if not provided

@option options [RDF::URI] :base

The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.

@option options [Boolean] :decode_uri

Decode %-encodings in the result of a URI Template operation.

@option options [Boolean] :normalize normalize the object @option options [Boolean] :validate Strict metadata validation @raise [Error] @return [Metadata]

# File lib/rdf/tabular/metadata.rb, line 327
def initialize(input, **options)
  @options = options.dup

  # Parent of this Metadata, if any
  @parent = @options[:parent]

  # Get context from input
  # Optimize by using built-in version of context, and just extract @base, @lang
  opt_base = @options[:base]
  opt_base ||= input.base_uri if input.respond_to?(:base_uri)
  opt_base ||= input.filename if input.respond_to?(:filename)

  @context = case input['@context']
  when Array
    log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
    c = LOCAL_CONTEXT.dup
    c.base = RDF::URI(opt_base)
    obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
    raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
    c.parse(obj)
  when Hash
    log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
    c = LOCAL_CONTEXT.dup
    c.base = RDF::URI(opt_base)
    c.parse(input['@context'])
  when "http://www.w3.org/ns/csvw"
    LOCAL_CONTEXT.dup
    c = LOCAL_CONTEXT.dup
    c.base = RDF::URI(opt_base)
    c
  else
    if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
      log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
      LOCAL_CONTEXT.dup
      c = LOCAL_CONTEXT.dup
      c.base = RDF::URI(opt_base)
      c
    end
  end

  reason = @options.delete(:reason)

  @options[:base] = @context ? @context.base : RDF::URI(opt_base)

  if @context && @context.default_language && !BCP47.valid?(@context.default_language.to_s)
    log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
    @context.default_language = nil
  end

  @filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames]
  @properties = self.class.const_get(:PROPERTIES)
  @required = self.class.const_get(:REQUIRED)

  @object = {}

  log_depth do
    # Input was parsed in .new
    # Metadata is object with symbolic keys
    input.each do |key, value|
      key = key.to_sym
      case key
      when :url
        # URL of CSV relative to metadata
        object[:url] = value
        @url = @options[:base].join(value)
        @options[:base] = @url if @context # Use as base for expanding IRIs
      when :@id
        # metadata identifier
        object[:@id] = if value.is_a?(String)
          value
        else
          log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
          ""  # Default value
        end
        @id = @options[:base].join(object[:@id])
      else
        if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
          self.send("#{key}=".to_sym, value)
        else
          object[key] = value
        end
      end
    end
  end

  # Set type from @type, if present and not otherwise defined
  @type = object[:@type].to_sym if object[:@type]

  if options[:normalize]
    # If normalizing, also remove remaining @context
    self.normalize!
    @context = nil
    object.delete(:@context)
  end

  if reason
    log_debug("md#initialize") {reason}
    log_debug("md#initialize") {"filenames: #{filenames}"}
    log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
  end
end
open(path, **options) click to toggle source

Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata object from it, otherwise, an empty Metadata object

@param [String] path @param [Hash{Symbol => Object}] options

see `RDF::Util::File.open_file` in RDF.rb and {new}

@yield [Metadata] @raise [IOError] if file not found

# File lib/rdf/tabular/metadata.rb, line 139
def self.open(path, **options)
  options = options.merge(
    headers: {
      'Accept' => 'application/ld+json, application/json'
    }
  )
  path = "file:#{path}" if RDF::URI(path).relative?
  RDF::Util::File.open_file(path, **options) do |file|
    self.new(file, **options.merge(base: path, filenames: path))
  end
rescue Errno::ENOENT => e
  raise IOError, e.message
end
site_wide_config(base) click to toggle source

Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles. @param [String] base the URL used for finding the file @return [Array<String>, false]

# File lib/rdf/tabular/metadata.rb, line 156
def self.site_wide_config(base)
  require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
  @cache ||= RDF::Util::Cache.new(-1)

  config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s
  # Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found
  if @cache[config_loc].nil?
    @cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd|
      rd.each_line.to_a
    end rescue SITE_WIDE_DEFAULT.split
  end
  @cache[config_loc]
end

Private Class Methods

log_debug(*args, **options, &block) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1291
def self.log_debug(*args, **options, &block)
  DebugContext.new.log_debug(*args, **options, &block)
end
log_warn(*args, **options) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1294
def self.log_warn(*args, **options)
  DebugContext.new.log_warn(*args, **options)
end

Public Instance Methods

==(other) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1095
def ==(other)
  object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other))
end
[](key) click to toggle source

Proxy to @object

# File lib/rdf/tabular/metadata.rb, line 1092
def [](key); object[key]; end
[]=(key, value) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1093
def []=(key, value); object[key] = value; end
base() click to toggle source

Base URL of metadata @return [RDF::URI]

# File lib/rdf/tabular/metadata.rb, line 559
def base; @options[:base]; end
common_properties(subject, property, value, &block) click to toggle source

Return JSON-friendly or yield RDF for common properties

@overload common_properties(subject, property, value, &block)

Yield RDF statements
@param [RDF::Resource] subject
@param [String] property
@param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value
@yield property, value
@yieldparam [String] property as a PName or URL
@yieldparam [RDF::Statement] statement

@overload common_properties(subject, property, value)

Return value with expanded values and node references flattened
@return [String, Hash{String => Object}, Array<String, Hash{String => Object}>] simply extracted from metadata
# File lib/rdf/tabular/metadata.rb, line 921
def common_properties(subject, property, value, &block)
  if block_given?
    property = context.expand_iri(property.to_s, vocab: true) unless property.is_a?(RDF::URI)
    case value
    when Array
      value.each {|v| common_properties(subject, property, v, &block)}
    when Hash
      if value['@value']
        dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
        lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
        block.call(RDF::Statement(subject, property, lit))
      else
        # value MUST be a node object, establish a new subject from `@id`
        s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new

        # Generate a triple
        block.call(RDF::Statement(subject, property, s2))

        # Generate types
        Array(value['@type']).each do |t|
          block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
        end

        # Generate triples for all other properties
        value.each do |prop, val|
          next if prop.to_s.start_with?('@')
          common_properties(s2, prop, val, &block)
        end
      end
    else
      # Value is a primitive JSON value
      lit = RDF::Literal(value)
      block.call(RDF::Statement(subject, property, RDF::Literal(value)))
    end
  else
    case value
    when Array
      value.map {|v| common_properties(subject, property, v)}
    when Hash
      if value['@value']
        value['@value']
      elsif value.keys == %w(@id) && value['@id']
        value['@id']
      else
        nv = {}
        value.each do |k, v|
          case k.to_s
          when '@id' then nv[k.to_s] = context.expand_iri(v['@id']).to_s
          when '@type' then nv[k.to_s] = v
          else nv[k.to_s] = common_properties(nil, k, v)
          end
        end
        nv
      end
    else
      value
    end
  end
end
context() click to toggle source

Context used for this metadata. Use parent’s if not defined on self. @return [JSON::LD::Context]

# File lib/rdf/tabular/metadata.rb, line 462
def context
  @context || (parent.context if parent)
end
datatype=(value) click to toggle source

Set new datatype @return [Dialect] @raise [Error] if datatype is not valid

# File lib/rdf/tabular/metadata.rb, line 539
def datatype=(value)
  val = case value
  when Hash then Datatype.new(value, **@options.merge(parent: self))
  else           Datatype.new({base: value}, **@options.merge(parent: self))
  end

  if val.valid? || value.is_a?(Hash)
    # Set it if it was specified as an object, which may cause validation errors later
    object[:datatype] = val
  else
    log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
  end
end
describes_file?(url) click to toggle source

Does this metadata describe the file (URL)? @param [RDF::URL] url @return [Boolean]

# File lib/rdf/tabular/metadata.rb, line 990
def describes_file?(url)
  case self
  when TableGroup
    tables.any? {|t| t.url == url}
  else
    self.url == url
  end
end
dialect() click to toggle source

Treat ‘dialect` similar to an inherited property, but merge together values from Table and TableGroup @return [Dialect]

# File lib/rdf/tabular/metadata.rb, line 495
def dialect
  @dialect ||= case
  when object[:dialect] then object[:dialect]
  when parent then parent.dialect
  when is_a?(Table) || is_a?(TableGroup)
    d = Dialect.new({}, **@options.merge(parent: self, context: nil))
    self.dialect = d unless self.parent
    d
  else
    raise Error, "Can't access dialect from #{self.class} without a parent"
  end
end
dialect=(value) click to toggle source

Set new dialect @return [Dialect]

# File lib/rdf/tabular/metadata.rb, line 510
def dialect=(value)
  # Clear cached dialect information from children
  object.values.each do |v|
    case v
    when Metadata then v.object.delete(:dialect)
    when Array then v.each {|vv| vv.object.delete(:dialect) if vv.is_a?(Metadata)}
    end
  end

  # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
  @dialect = object[:dialect] = case value
  when String
    link = context.base.join(value).to_s
    md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
    md[:@id] ||= link
    md
  when Hash
    Dialect.new(value, **@options.merge(parent: self, context: nil))
  when Dialect
    value
  else
    log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
    nil
  end
end
each(&block) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1094
def each(&block); object.each(&block); end
each_row(input) { |row| ... } click to toggle source

Yield each data row from the input file

@param [:read] input @yield [Row]

# File lib/rdf/tabular/metadata.rb, line 854
def each_row(input)
  csv, number, skipped = nil, 0, 0
  path = input.base_uri.path rescue ""
  if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
    # Input is HTML; use fragment identfier to find table.
    fragment = RDF::URI(self.url).fragment rescue nil
    tab = begin
      # Extract with nokogiri
      require 'nokogiri' unless defined?(:Nokogiri)
      doc = Nokogiri::HTML.parse(input)
      doc.search("##{fragment}").first if fragment
    rescue LoadError
      # Extract with REXML
      # FIXME
    end

    raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab

    # Use rows with <td> to create column data
    csv = []
    number = 0
    tab.xpath('.//tr').map do |row|
      number += 1 if row.xpath('th')
      data = row.xpath('td').map(&:content)
      csv << data unless data.empty?
    end
  else
    csv = ::CSV.new(input, **csv_options)
    # Skip skipRows and headerRowCount
    skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
    (1..skipped).each {csv.shift}
  end
  csv.each do |data|
    # Check for embedded comments
    if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
      v = data.join(' ')[1..-1].strip
      unless v.empty?
        (self["rdfs:comment"] ||= []) << v
      end
      skipped += 1
      next
    elsif dialect.skipBlankRows && data.join("").strip.empty?
      skipped += 1
      next
    end
    number += 1
    row = Row.new(data, self, number, number + skipped, **@options)
    (self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
    yield(row)
  end
end
has_annotations?() click to toggle source

Does the Metadata have any common properties? @return [Boolean]

# File lib/rdf/tabular/metadata.rb, line 983
def has_annotations?
  object.keys.any? {|k| k.to_s.include?(':')}
end
inspect() click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1087
def inspect
  self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect
end
normalize!() click to toggle source

Normalize object @raise [Error] @return [self]

# File lib/rdf/tabular/metadata.rb, line 1104
def normalize!
  self.each do |key, value|
    self[key] = case @properties[key] || INHERITED_PROPERTIES[key]
    when ->(k) {key.to_s.include?(':') || key == :notes}
      normalize_jsonld(key, value)
    when ->(k) {key.to_s == '@context'}
      "http://www.w3.org/ns/csvw"
    when :array
      value = [value] unless value.is_a?(Array)
      value.map do |v|
        if v.is_a?(Metadata)
          v.normalize!
        elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
          # SPEC SUGGESTION: special case for foreignKeys
          ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"]
          ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
          v
        else
          v
        end
      end
    when :link
      context.base.join(value).to_s
    when :object
      case value
      when Metadata then value.normalize!
      when String
        # Load referenced JSON document
        # (This is done when objects are loaded in this implementation)
        raise "unexpected String value of property '#{key}': #{value}"
      else value
      end
    when :natural_language
      value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
    when :atomic
      case key
      when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
        # Convert to a typed literal based on `base`. This will be validated later
        RDF::Literal(value, datatype: DATATYPES[self.base.to_sym])
      else
        value
      end
    else
      value
    end
  end
  self
end
normalize_jsonld(property, value) click to toggle source

Normalize JSON-LD

Also, raise error if invalid JSON-LD dialect is detected

@param [Symbol, String] property @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>]

# File lib/rdf/tabular/metadata.rb, line 1161
def normalize_jsonld(property, value)
  case value
  when Array
    value.map {|v| normalize_jsonld(property, v)}
  when String
    ev = {'@value' => value}
    ev['@language'] = context.default_language if context.default_language
    ev
  when Hash
    if value['@value']
      if !(value.keys.sort - %w(@value @type @language)).empty?
        log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
      elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
        log_error "Value object may not contain both @type and @language: #{value.to_json}"
      elsif value['@language'] && !BCP47.valid?(value['@language'].to_s)
        log_error "Value object with @language must use valid language: #{value.to_json}"
      elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
        log_error "Value object with @type must defined type: #{value.to_json}"
      end
      value
    else
      nv = {}
      value.each do |k, v|
        case k
        when "@id"
          nv[k] = context.expand_iri(v, documentRelative: true).to_s
          log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
        when "@type"
          Array(v).each do |vv|
            # Validate that all type values transform to absolute IRIs
            resource = context.expand_iri(vv, vocab: true)
            log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
          end
          nv[k] = v
        when /^(@|_:)/
          log_error "Invalid use of #{k} in JSON-LD content"
        else
          nv[k] = normalize_jsonld(k, v)
        end
      end
      nv
    end
  else
    value
  end
end
tableSchema=(value) click to toggle source

An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document when loading a remote schema, assign @id from it’s location if not already set

# File lib/rdf/tabular/metadata.rb, line 472
def tableSchema=(value)
  object[:tableSchema] = case value
  when String
    link = context.base.join(value).to_s
    md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
    md[:@id] ||= link
    md
  when Hash
    Schema.new(value, **@options.merge(parent: self, context: nil))
  when Schema
    value
  else
    log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
    Schema.new({}, **@options.merge(parent: self, context: nil))
  end
end
tables=(value) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 466
def tables=(value)
  set_array_value(:tables, value, Table)
end
to_json(args=nil) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1098
def to_json(args=nil); object.to_json(args); end
transformations=(value) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 489
def transformations=(value)
  set_array_value(:transformations, value, Metadata)
end
type() click to toggle source

Type of this Metadata @return [:TableGroup, :Table, :Template, :Schema, :Column]

# File lib/rdf/tabular/metadata.rb, line 555
def type; self.class.name.split('::').last.to_sym; end
valid?() click to toggle source

Do we have valid metadata?

# File lib/rdf/tabular/metadata.rb, line 563
def valid?
  validate # Possibly re-validate
  !log_statistics[:error]
end
valid_natural_language_property?(value) click to toggle source

Determine if a natural language property is valid @param [String, Array<String>, Hash{String => String}] value @yield message error message @return [String, nil]

# File lib/rdf/tabular/metadata.rb, line 835
def valid_natural_language_property?(value)
  case value
  when String
  when Array
    "a valid natural language property" unless value.all? {|v| v.is_a?(String)}
  when Hash
    "a valid natural language property" if
      value.keys.any? {|k| k.to_s != "und" && !BCP47.valid?(k)} ||
      value.values.any? {|v| valid_natural_language_property?(v).is_a?(String)}
  else
    "a valid natural language property"
  end
end
validate() click to toggle source

Validate metadata and content, raising an error containing all errors detected during validation @raise [Error] Raise error if metadata has any unexpected properties @return [self]

# File lib/rdf/tabular/metadata.rb, line 576
def validate
  expected_props, required_props = @properties.keys, @required

  unless is_a?(Dialect) || is_a?(Transformation)
    expected_props = expected_props + INHERITED_PROPERTIES.keys
  end

  # It has only expected properties (exclude metadata)
  check_keys = object.keys - [:"@id", :"@context"]
  check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
  log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}

  # It has required properties
  log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}"  unless (required_props & check_keys) == required_props

  self.normalize!

  # Every property is valid
  object.keys.each do |key|
    value = object[key]
    case key
    when :base
      log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
    when :columns
      value.each do |col|
        col.validate
        log_statistics.merge!(col.log_statistics)
      end
      column_names = value.map(&:name)
      log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
    when :datatype, :dialect, :tables, :tableSchema, :transformations
      Array(value).each do |t|
        # Make sure value is of appropriate class
        if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
          t.validate
          log_statistics.merge!(t.log_statistics)
        else
          log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
        end
      end
      log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
    when :foreignKeys
      # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
      value.each do |fk|
        columnReference, reference = fk['columnReference'], fk['reference']
        log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
        log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2

        # Verify that columns exist in this schema
        log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
        Array(columnReference).each do |k|
          log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
        end

        if reference.is_a?(Hash)
          log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
          ref_cols = reference['columnReference']
          schema = if reference.has_key?('resource')
            if reference.has_key?('schemaReference')
              log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}" 
            end
            # resource is the URL of a Table in the TableGroup
            ref = context.base.join(reference['resource']).to_s
            table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
            log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
            table.tableSchema if table
          elsif reference.has_key?('schemaReference')
            # resource is the @id of a Schema in the TableGroup
            ref = context.base.join(reference['schemaReference']).to_s
            tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
            case tables.length
            when 0
              log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
              nil
            when 1
              tables.first.tableSchema
            else
              log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
              nil
            end
          end

          if schema
            # ref_cols must exist in schema
            log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
            Array(ref_cols).each do |k|
              log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
            end
          end
        else
          log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
        end
      end
    when :format
      case value
      when Hash
        # Object form only appropriate for numeric type
        unless %w(
          decimal integer long int short byte double float number
          nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
          unsignedLong unsignedInt unsignedShort unsignedByte
        ).include?(self.base)
          log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
          object.delete(:format) # act as if not set
        end

        # Otherwise, if it exists, its a UAX35 number pattern
        begin
          parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
        rescue ArgumentError => e
          log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
          object[:format].delete("pattern") if object[:format] # act as if not set
        end
      else
        case self.base
        when 'boolean'
          unless value.split("|").length == 2
            log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
            object.delete(:format) # act as if not set
          end
        when :decimal, :integer, :long, :int, :short, :byte,
             :nonNegativeInteger, :positiveInteger,
             :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
             :nonPositiveInteger, :negativeInteger,
             :double, :float, :number
          begin
            parse_uax35_number(value, nil)
          rescue ArgumentError => e
            log_warn "#{type} has invalid property '#{key}': #{e.message}"
            object.delete(:format) # act as if not set
          end
        when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
          # Parse and validate format
          begin
            parse_uax35_date(value, nil)
          rescue ArgumentError => e
            log_warn "#{type} has invalid property '#{key}': #{e.message}"
            object.delete(:format) # act as if not set
          end
        else
          # Otherwise, if it exists, its a regular expression
          begin
            Regexp.compile(value)
          rescue
            log_warn "#{type} has invalid property '#{key}': #{$!.message}"
            object.delete(:format) # act as if not set
          end
        end
      end
    when :length, :minLength, :maxLength
      # Applications must raise an error if both length and minLength are specified and length is less than minLength.
      # Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength.
      if object[:length]
        case key
        when :minLength
          log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
        when :maxLength
          log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
        end
      end

      # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
      if key == :maxLength && object[:minLength]
        log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
      end

      # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
      unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
        log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
      end
    when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
      case self.base
      when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float',
           'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
           'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
           'duration', 'dayTimeDuration', 'yearMonthDuration'
        log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?

        case key
        when :minInclusive
          # Applications MUST raise an error if both minInclusive and minExclusive are specified
          log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive

          # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
          log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value

          # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
          log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
        when :maxInclusive
          # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
          log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
        when :minExclusive
          # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
          log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value

          # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
          log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
        end
      else
        log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
      end
    when :notes
      unless value.is_a?(Hash) || value.is_a?(Array)
        log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
      end
      begin
        normalize_jsonld(key, value)
      rescue Error => e
        log_error "#{type} has invalid content '#{key}': #{e.message}"
      end
    when :primaryKey, :rowTitles
      # A column reference property that holds either a single reference to a column description object or an array of references.
      "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
      Array(value).each do |k|
        unless self.columns.any? {|c| c[:name] == k}
          log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
          object.delete(key)
        end
      end
    when :@context
      # Skip these
    when :@id
      # Must not be a BNode
      if value.to_s.start_with?("_:")
        log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
      end

      # Datatype @id MUST NOT be the URL of a built-in type
      if self.is_a?(Datatype) && DATATYPES.values.include?(value)
        log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
      end
    when :@type
      # Must not be a BNode
      if value.to_s.start_with?("_:")
        log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
      end
      case type
      when :Transformation
        log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
      else
        log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
      end
    when ->(k) {key.to_s.include?(':')}
      begin
        normalize_jsonld(key, value)
      rescue Error => e
        log_error "#{type} has invalid content '#{key}': #{e.message}"
      end
    end
  end

  self
end
validate!() click to toggle source
# File lib/rdf/tabular/metadata.rb, line 568
def validate!
  raise Error, "Metadata error" unless valid?
end
verify_compatible!(other) click to toggle source

Verify that the metadata we’re using is compatible with embedded metadata @param [Table] other @raise [Error] if not compatible

# File lib/rdf/tabular/metadata.rb, line 1002
def verify_compatible!(other)
  if self.is_a?(TableGroup)
    unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
      if @options[:validate]
        raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
      else
        log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
      end
    end
  else
    # Tables must have the same url
    unless url == other.url
      if @options[:validate]
        raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
      else
        log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
      end
    end

    # Each column description within B MUST match the corresponding column description in A for non-virtual columns
    non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
    object_columns = Array(other.tableSchema.columns)

    # Special case, if there is no header, then there are no column definitions, allow this as being compatible
    if non_virtual_columns.length != object_columns.length && !object_columns.empty?
      if @options[:validate]
        raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
      else
        log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"

        # If present, a virtual column MUST appear after all other non-virtual column definitions
        raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
        virtual_columns = Array(tableSchema.columns).select(&:virtual)
        while non_virtual_columns.length < object_columns.length
          non_virtual_columns << nil
        end

        # Create necessary column entries
        tableSchema.columns = non_virtual_columns + virtual_columns
      end
    end
    index = 0
    object_columns.all? do |cb|
      ca = non_virtual_columns[index] || Column.new({}, **@options)
      ta = ca.titles || {}
      tb = cb.titles || {}
      if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
        true
      elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
        raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
      elsif @options[:validate] || !ta.empty? && !tb.empty?
        # If validating, column compatibility requires strict match between titles
        titles_match = case
        when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)}
          true
        when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)}
          true
        when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?}
          # Match on title and language
          true
        else
          # Match if a language from ta is a prefix of a language from tb with matching titles
          ta.any? do |la, values|
            tb.keys.any? do |lb|
              (la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty?
            end
          end
        end

        if titles_match
          true
        elsif !@options[:validate]
          # If not validating, columns don't match, but processing continues
          log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
          true
        else
          raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
        end
      end
      index += 1
    end
  end
  true
end

Protected Instance Methods

default_value(prop) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1266
def default_value(prop)
  self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
end
root() click to toggle source

Get the root metadata object @return [TableGroup, Table]

# File lib/rdf/tabular/metadata.rb, line 1273
def root
  self.parent ? self.parent.root : self
end
set_array_value(key, value, klass, **options) click to toggle source

General setter for array properties

# File lib/rdf/tabular/metadata.rb, line 1244
def set_array_value(key, value, klass, **options)
  object[key] = case value
  when Array
    value.map do |v|
      case v
      when Hash
        klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
      else v
      end
    end
  else
    log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
    []
  end

  unless object[key].all? {|v| v.is_a?(klass)}
    log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
    # Remove elements that aren't of the right types
    object[key] = object[key].select! {|v| v.is_a?(klass)}
  end
end
set_nl(value) click to toggle source

When setting a natural language property, always put in language-map form @param [Hash{String => String, Array<String>}, Array<String>, String] value @return [Hash{String => Array<String>}]

# File lib/rdf/tabular/metadata.rb, line 1230
def set_nl(value)
  case value
  when String then value
  when Array then value.select {|v| v.is_a?(String)}
  when Hash
    value.delete_if {|k, v| !BCP47.valid?(k)}
    value.each do |k, v|
      value[k] = Array(v).select {|vv| vv.is_a?(String)}
    end
  else nil
  end
end
set_property(key, type, value, invalid) click to toggle source
# File lib/rdf/tabular/metadata.rb, line 1209
def set_property(key, type, value, invalid)
  if invalid
    log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
    case type
    when :link, :uri_template
      object[key] = ""
    when :object
      object[key] = {}
    when :natural_language
      object[key] = set_nl(value) || []
    else
      object.delete(key)
    end
  else
    object[key] = value
  end
end

Private Instance Methods

csv_options() click to toggle source

Options passed to CSV.new based on dialect @todo lineTerminators is ignored, as CSV parser uses single string or ‘:auto`

# File lib/rdf/tabular/metadata.rb, line 1279
def csv_options
  {
    col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
    #row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first,
    quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
    encoding: (is_a?(Dialect) ? self : dialect).encoding
  }
end