class RDF::Tabular::Metadata
Constants
- DATATYPES
Valid datatypes
- INHERITED_DEFAULTS
- INHERITED_PROPERTIES
Inheritect properties, valid for all types
- LOCAL_CONTEXT
Local version of the context @return [JSON::LD::Context]
- NAME_SYNTAX
A name is restricted according to the following RegExp. @return [RegExp]
Attributes
Filename(s) (URI) of opened metadata, if any May be plural when merged @return [Array<RDF::URI>] filenames
ID of this Metadata
@return [RDF::URI]
Hash representation @return [Hash<Symbol,Object>]
Parent of this Metadata
(TableGroup
for Table
, …) @return [Metadata]
URL of related resource @return [RDF::URI]
Public Class Methods
Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file @param [IO, StringIO] input @param [Hash{Symbol => Object}] options @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata
is loade from that location @option options [RDF::URI] :base
The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
@return [Metadata]
# File lib/rdf/tabular/metadata.rb, line 178 def self.for_input(input, **options) base = options[:base] # Use user metadata, if provided metadata = case options[:metadata] when Metadata then options[:metadata] when Hash Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}")) when String, RDF::URI Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}")) end # Search for metadata until found # load link metadata, if available if !metadata && input.respond_to?(:links) && link = input.links.find_link(%w(rel describedby)) link_loc = RDF::URI(base).join(link.href).to_s md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}")) if md # Metadata must describe file to be useful if md.describes_file?(base) metadata = md else log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options) end end end locs = [] # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations if !metadata && base templates = site_wide_config(base) log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"} locs = templates.map do |template| t = Addressable::Template.new(template) mapped = t.expand(url: base).to_s mapped = RDF::URI.decode(mapped) if options[:decode_uri] RDF::URI(base).join(mapped) end log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"} locs.each do |loc| metadata ||= begin md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}")) # Metadata must describe file to be useful if md # Metadata must describe file to be useful if md.describes_file?(base) md else log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options) nil end end rescue IOError log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"} nil end end end # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup metadata = case when metadata then metadata when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options) else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options) end # Make TableGroup, if not already metadata.is_a?(TableGroup) ? metadata : metadata.to_table_group end
@private
# File lib/rdf/tabular/metadata.rb, line 253 def self.new(input, **options) # Triveal case return input if input.is_a?(Metadata) object = case input when Hash then input when IO, StringIO then ::JSON.parse(input.read) else ::JSON.parse(input.to_s) end raise ::JSON::ParserError unless object.is_a?(Hash) unless options[:parent] # Add context, if not set (which it should be) object['@context'] ||= options.delete(:@context) || options[:context] end klass = case when !self.equal?(RDF::Tabular::Metadata) self # subclasses can be directly constructed without type dispatch else type = options[:type].to_sym if options[:type] # Figure out type by @type type ||= object['@type'].to_sym if object['@type'] # Otherwise, Figure out type by site object_keys = object.keys.map(&:to_s) type ||= case when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Template when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect end case type.to_s.to_sym when :TableGroup, :"" then RDF::Tabular::TableGroup when :Table then RDF::Tabular::Table when :Template then RDF::Tabular::Transformation when :Schema then RDF::Tabular::Schema when :Column then RDF::Tabular::Column when :Dialect then RDF::Tabular::Dialect else raise Error, "Unknown metadata type: #{type.inspect}" end end md = klass.allocate md.send(:initialize, object, **options) md rescue ::JSON::ParserError raise Error, "Expected input to be a JSON Object" end
Create Metadata
from IO, Hash or String
@param [Metadata, Hash, read] input @param [Hash{Symbol => Object}] options @option options [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect] :type
Type of schema, if not set, intuited from properties
@option options [JSON::LD::Context] context
Context used for this metadata. Taken from input if not provided
@option options [RDF::URI] :base
The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
@option options [Boolean] :decode_uri
Decode %-encodings in the result of a URI Template operation.
@option options [Boolean] :normalize normalize the object @option options [Boolean] :validate Strict metadata validation @raise [Error] @return [Metadata]
# File lib/rdf/tabular/metadata.rb, line 327 def initialize(input, **options) @options = options.dup # Parent of this Metadata, if any @parent = @options[:parent] # Get context from input # Optimize by using built-in version of context, and just extract @base, @lang opt_base = @options[:base] opt_base ||= input.base_uri if input.respond_to?(:base_uri) opt_base ||= input.filename if input.respond_to?(:filename) @context = case input['@context'] when Array log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw') c = LOCAL_CONTEXT.dup c.base = RDF::URI(opt_base) obj = input['@context'].detect {|e| e.is_a?(Hash)} || {} raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty? c.parse(obj) when Hash log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" c = LOCAL_CONTEXT.dup c.base = RDF::URI(opt_base) c.parse(input['@context']) when "http://www.w3.org/ns/csvw" LOCAL_CONTEXT.dup c = LOCAL_CONTEXT.dup c.base = RDF::URI(opt_base) c else if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" LOCAL_CONTEXT.dup c = LOCAL_CONTEXT.dup c.base = RDF::URI(opt_base) c end end reason = @options.delete(:reason) @options[:base] = @context ? @context.base : RDF::URI(opt_base) if @context && @context.default_language && !BCP47.valid?(@context.default_language.to_s) log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag" @context.default_language = nil end @filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames] @properties = self.class.const_get(:PROPERTIES) @required = self.class.const_get(:REQUIRED) @object = {} log_depth do # Input was parsed in .new # Metadata is object with symbolic keys input.each do |key, value| key = key.to_sym case key when :url # URL of CSV relative to metadata object[:url] = value @url = @options[:base].join(value) @options[:base] = @url if @context # Use as base for expanding IRIs when :@id # metadata identifier object[:@id] = if value.is_a?(String) value else log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string" "" # Default value end @id = @options[:base].join(object[:@id]) else if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key) self.send("#{key}=".to_sym, value) else object[key] = value end end end end # Set type from @type, if present and not otherwise defined @type = object[:@type].to_sym if object[:@type] if options[:normalize] # If normalizing, also remove remaining @context self.normalize! @context = nil object.delete(:@context) end if reason log_debug("md#initialize") {reason} log_debug("md#initialize") {"filenames: #{filenames}"} log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect) end end
Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata
object from it, otherwise, an empty Metadata
object
@param [String] path @param [Hash{Symbol => Object}] options
see `RDF::Util::File.open_file` in RDF.rb and {new}
@yield [Metadata] @raise [IOError] if file not found
# File lib/rdf/tabular/metadata.rb, line 139 def self.open(path, **options) options = options.merge( headers: { 'Accept' => 'application/ld+json, application/json' } ) path = "file:#{path}" if RDF::URI(path).relative? RDF::Util::File.open_file(path, **options) do |file| self.new(file, **options.merge(base: path, filenames: path)) end rescue Errno::ENOENT => e raise IOError, e.message end
Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles. @param [String] base the URL used for finding the file @return [Array<String>, false]
# File lib/rdf/tabular/metadata.rb, line 156 def self.site_wide_config(base) require 'rdf/util/cache' unless defined?(::RDF::Util::Cache) @cache ||= RDF::Util::Cache.new(-1) config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s # Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found if @cache[config_loc].nil? @cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd| rd.each_line.to_a end rescue SITE_WIDE_DEFAULT.split end @cache[config_loc] end
Private Class Methods
# File lib/rdf/tabular/metadata.rb, line 1291 def self.log_debug(*args, **options, &block) DebugContext.new.log_debug(*args, **options, &block) end
# File lib/rdf/tabular/metadata.rb, line 1294 def self.log_warn(*args, **options) DebugContext.new.log_warn(*args, **options) end
Public Instance Methods
# File lib/rdf/tabular/metadata.rb, line 1095 def ==(other) object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other)) end
Proxy to @object
# File lib/rdf/tabular/metadata.rb, line 1092 def [](key); object[key]; end
# File lib/rdf/tabular/metadata.rb, line 1093 def []=(key, value); object[key] = value; end
Base URL of metadata @return [RDF::URI]
# File lib/rdf/tabular/metadata.rb, line 559 def base; @options[:base]; end
Return JSON-friendly or yield RDF
for common properties
@overload common_properties
(subject, property, value, &block)
Yield RDF statements @param [RDF::Resource] subject @param [String] property @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value @yield property, value @yieldparam [String] property as a PName or URL @yieldparam [RDF::Statement] statement
@overload common_properties
(subject, property, value)
Return value with expanded values and node references flattened @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>] simply extracted from metadata
# File lib/rdf/tabular/metadata.rb, line 921 def common_properties(subject, property, value, &block) if block_given? property = context.expand_iri(property.to_s, vocab: true) unless property.is_a?(RDF::URI) case value when Array value.each {|v| common_properties(subject, property, v, &block)} when Hash if value['@value'] dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type'] lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt) block.call(RDF::Statement(subject, property, lit)) else # value MUST be a node object, establish a new subject from `@id` s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new # Generate a triple block.call(RDF::Statement(subject, property, s2)) # Generate types Array(value['@type']).each do |t| block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true))) end # Generate triples for all other properties value.each do |prop, val| next if prop.to_s.start_with?('@') common_properties(s2, prop, val, &block) end end else # Value is a primitive JSON value lit = RDF::Literal(value) block.call(RDF::Statement(subject, property, RDF::Literal(value))) end else case value when Array value.map {|v| common_properties(subject, property, v)} when Hash if value['@value'] value['@value'] elsif value.keys == %w(@id) && value['@id'] value['@id'] else nv = {} value.each do |k, v| case k.to_s when '@id' then nv[k.to_s] = context.expand_iri(v['@id']).to_s when '@type' then nv[k.to_s] = v else nv[k.to_s] = common_properties(nil, k, v) end end nv end else value end end end
Context used for this metadata. Use parent’s if not defined on self. @return [JSON::LD::Context]
# File lib/rdf/tabular/metadata.rb, line 462 def context @context || (parent.context if parent) end
Set new datatype @return [Dialect] @raise [Error] if datatype is not valid
# File lib/rdf/tabular/metadata.rb, line 539 def datatype=(value) val = case value when Hash then Datatype.new(value, **@options.merge(parent: self)) else Datatype.new({base: value}, **@options.merge(parent: self)) end if val.valid? || value.is_a?(Hash) # Set it if it was specified as an object, which may cause validation errors later object[:datatype] = val else log_warn "#{type} has invalid property 'datatype': expected a built-in or an object" end end
Does this metadata describe the file (URL)? @param [RDF::URL] url @return [Boolean]
# File lib/rdf/tabular/metadata.rb, line 990 def describes_file?(url) case self when TableGroup tables.any? {|t| t.url == url} else self.url == url end end
Treat ‘dialect` similar to an inherited property, but merge together values from Table
and TableGroup
@return [Dialect]
# File lib/rdf/tabular/metadata.rb, line 495 def dialect @dialect ||= case when object[:dialect] then object[:dialect] when parent then parent.dialect when is_a?(Table) || is_a?(TableGroup) d = Dialect.new({}, **@options.merge(parent: self, context: nil)) self.dialect = d unless self.parent d else raise Error, "Can't access dialect from #{self.class} without a parent" end end
Set new dialect @return [Dialect]
# File lib/rdf/tabular/metadata.rb, line 510 def dialect=(value) # Clear cached dialect information from children object.values.each do |v| case v when Metadata then v.object.delete(:dialect) when Array then v.each {|vv| vv.object.delete(:dialect) if vv.is_a?(Metadata)} end end # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model. @dialect = object[:dialect] = case value when String link = context.base.join(value).to_s md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true)) md[:@id] ||= link md when Hash Dialect.new(value, **@options.merge(parent: self, context: nil)) when Dialect value else log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object" nil end end
# File lib/rdf/tabular/metadata.rb, line 1094 def each(&block); object.each(&block); end
Yield each data row from the input file
@param [:read] input @yield [Row]
# File lib/rdf/tabular/metadata.rb, line 854 def each_row(input) csv, number, skipped = nil, 0, 0 path = input.base_uri.path rescue "" if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html' # Input is HTML; use fragment identfier to find table. fragment = RDF::URI(self.url).fragment rescue nil tab = begin # Extract with nokogiri require 'nokogiri' unless defined?(:Nokogiri) doc = Nokogiri::HTML.parse(input) doc.search("##{fragment}").first if fragment rescue LoadError # Extract with REXML # FIXME end raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab # Use rows with <td> to create column data csv = [] number = 0 tab.xpath('.//tr').map do |row| number += 1 if row.xpath('th') data = row.xpath('td').map(&:content) csv << data unless data.empty? end else csv = ::CSV.new(input, **csv_options) # Skip skipRows and headerRowCount skipped = (dialect.skipRows.to_i + dialect.headerRowCount) (1..skipped).each {csv.shift} end csv.each do |data| # Check for embedded comments if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix) v = data.join(' ')[1..-1].strip unless v.empty? (self["rdfs:comment"] ||= []) << v end skipped += 1 next elsif dialect.skipBlankRows && data.join("").strip.empty? skipped += 1 next end number += 1 row = Row.new(data, self, number, number + skipped, **@options) (self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating yield(row) end end
Does the Metadata
have any common properties? @return [Boolean]
# File lib/rdf/tabular/metadata.rb, line 983 def has_annotations? object.keys.any? {|k| k.to_s.include?(':')} end
# File lib/rdf/tabular/metadata.rb, line 1087 def inspect self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect end
Normalize object @raise [Error] @return [self]
# File lib/rdf/tabular/metadata.rb, line 1104 def normalize! self.each do |key, value| self[key] = case @properties[key] || INHERITED_PROPERTIES[key] when ->(k) {key.to_s.include?(':') || key == :notes} normalize_jsonld(key, value) when ->(k) {key.to_s == '@context'} "http://www.w3.org/ns/csvw" when :array value = [value] unless value.is_a?(Array) value.map do |v| if v.is_a?(Metadata) v.normalize! elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash) # SPEC SUGGESTION: special case for foreignKeys ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"] ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"] v else v end end when :link context.base.join(value).to_s when :object case value when Metadata then value.normalize! when String # Load referenced JSON document # (This is done when objects are loaded in this implementation) raise "unexpected String value of property '#{key}': #{value}" else value end when :natural_language value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)} when :atomic case key when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive # Convert to a typed literal based on `base`. This will be validated later RDF::Literal(value, datatype: DATATYPES[self.base.to_sym]) else value end else value end end self end
Normalize JSON-LD
Also, raise error if invalid JSON-LD dialect is detected
@param [Symbol, String] property @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>]
# File lib/rdf/tabular/metadata.rb, line 1161 def normalize_jsonld(property, value) case value when Array value.map {|v| normalize_jsonld(property, v)} when String ev = {'@value' => value} ev['@language'] = context.default_language if context.default_language ev when Hash if value['@value'] if !(value.keys.sort - %w(@value @type @language)).empty? log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}" elsif (value.keys.sort & %w(@language @type)) == %w(@language @type) log_error "Value object may not contain both @type and @language: #{value.to_json}" elsif value['@language'] && !BCP47.valid?(value['@language'].to_s) log_error "Value object with @language must use valid language: #{value.to_json}" elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?) log_error "Value object with @type must defined type: #{value.to_json}" end value else nv = {} value.each do |k, v| case k when "@id" nv[k] = context.expand_iri(v, documentRelative: true).to_s log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:') when "@type" Array(v).each do |vv| # Validate that all type values transform to absolute IRIs resource = context.expand_iri(vv, vocab: true) log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute? end nv[k] = v when /^(@|_:)/ log_error "Invalid use of #{k} in JSON-LD content" else nv[k] = normalize_jsonld(k, v) end end nv end else value end end
An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON
metadata or as a URL reference to a separate JSON
schema document when loading a remote schema, assign @id from it’s location if not already set
# File lib/rdf/tabular/metadata.rb, line 472 def tableSchema=(value) object[:tableSchema] = case value when String link = context.base.join(value).to_s md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true)) md[:@id] ||= link md when Hash Schema.new(value, **@options.merge(parent: self, context: nil)) when Schema value else log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object" Schema.new({}, **@options.merge(parent: self, context: nil)) end end
# File lib/rdf/tabular/metadata.rb, line 466 def tables=(value) set_array_value(:tables, value, Table) end
# File lib/rdf/tabular/metadata.rb, line 1098 def to_json(args=nil); object.to_json(args); end
# File lib/rdf/tabular/metadata.rb, line 489 def transformations=(value) set_array_value(:transformations, value, Metadata) end
Type of this Metadata
@return [:TableGroup, :Table, :Template, :Schema, :Column]
# File lib/rdf/tabular/metadata.rb, line 555 def type; self.class.name.split('::').last.to_sym; end
Do we have valid metadata?
# File lib/rdf/tabular/metadata.rb, line 563 def valid? validate # Possibly re-validate !log_statistics[:error] end
Determine if a natural language property is valid @param [String, Array<String>, Hash{String => String}] value @yield message error message @return [String, nil]
# File lib/rdf/tabular/metadata.rb, line 835 def valid_natural_language_property?(value) case value when String when Array "a valid natural language property" unless value.all? {|v| v.is_a?(String)} when Hash "a valid natural language property" if value.keys.any? {|k| k.to_s != "und" && !BCP47.valid?(k)} || value.values.any? {|v| valid_natural_language_property?(v).is_a?(String)} else "a valid natural language property" end end
Validate metadata and content, raising an error containing all errors detected during validation @raise [Error] Raise error if metadata has any unexpected properties @return [self]
# File lib/rdf/tabular/metadata.rb, line 576 def validate expected_props, required_props = @properties.keys, @required unless is_a?(Dialect) || is_a?(Transformation) expected_props = expected_props + INHERITED_PROPERTIES.keys end # It has only expected properties (exclude metadata) check_keys = object.keys - [:"@id", :"@context"] check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect) log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)} # It has required properties log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props self.normalize! # Every property is valid object.keys.each do |key| value = object[key] case key when :base log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) when :columns value.each do |col| col.validate log_statistics.merge!(col.log_statistics) end column_names = value.map(&:name) log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names when :datatype, :dialect, :tables, :tableSchema, :transformations Array(value).each do |t| # Make sure value is of appropriate class if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key]) t.validate log_statistics.merge!(t.log_statistics) else log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}" end end log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty? when :foreignKeys # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties: value.each do |fk| columnReference, reference = fk['columnReference'], fk['reference'] log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2 # Verify that columns exist in this schema log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0 Array(columnReference).each do |k| log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k} end if reference.is_a?(Hash) log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty? ref_cols = reference['columnReference'] schema = if reference.has_key?('resource') if reference.has_key?('schemaReference') log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}" end # resource is the URL of a Table in the TableGroup ref = context.base.join(reference['resource']).to_s table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref} log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table table.tableSchema if table elsif reference.has_key?('schemaReference') # resource is the @id of a Schema in the TableGroup ref = context.base.join(reference['schemaReference']).to_s tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : [] case tables.length when 0 log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found" nil when 1 tables.first.tableSchema else log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}" nil end end if schema # ref_cols must exist in schema log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0 Array(ref_cols).each do |k| log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k} end end else log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}" end end when :format case value when Hash # Object form only appropriate for numeric type unless %w( decimal integer long int short byte double float number nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger unsignedLong unsignedInt unsignedShort unsignedByte ).include?(self.base) log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes" object.delete(:format) # act as if not set end # Otherwise, if it exists, its a UAX35 number pattern begin parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.')) rescue ArgumentError => e log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}" object[:format].delete("pattern") if object[:format] # act as if not set end else case self.base when 'boolean' unless value.split("|").length == 2 log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'" object.delete(:format) # act as if not set end when :decimal, :integer, :long, :int, :short, :byte, :nonNegativeInteger, :positiveInteger, :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte, :nonPositiveInteger, :negativeInteger, :double, :float, :number begin parse_uax35_number(value, nil) rescue ArgumentError => e log_warn "#{type} has invalid property '#{key}': #{e.message}" object.delete(:format) # act as if not set end when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time' # Parse and validate format begin parse_uax35_date(value, nil) rescue ArgumentError => e log_warn "#{type} has invalid property '#{key}': #{e.message}" object.delete(:format) # act as if not set end else # Otherwise, if it exists, its a regular expression begin Regexp.compile(value) rescue log_warn "#{type} has invalid property '#{key}': #{$!.message}" object.delete(:format) # act as if not set end end end when :length, :minLength, :maxLength # Applications must raise an error if both length and minLength are specified and length is less than minLength. # Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength. if object[:length] case key when :minLength log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value when :maxLength log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value end end # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength. if key == :maxLength && object[:minLength] log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value end # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type. unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base) log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes" end when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive case self.base when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float', 'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte', 'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time', 'duration', 'dayTimeDuration', 'yearMonthDuration' log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid? case key when :minInclusive # Applications MUST raise an error if both minInclusive and minExclusive are specified log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value when :maxInclusive # Applications MUST raise an error if both maxInclusive and maxExclusive are specified log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive when :minExclusive # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value end else log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes" end when :notes unless value.is_a?(Hash) || value.is_a?(Array) log_error "#{type} has invalid property '#{key}': #{value}, Object or Array" end begin normalize_jsonld(key, value) rescue Error => e log_error "#{type} has invalid content '#{key}': #{e.message}" end when :primaryKey, :rowTitles # A column reference property that holds either a single reference to a column description object or an array of references. "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0 Array(value).each do |k| unless self.columns.any? {|c| c[:name] == k} log_warn "#{type} has invalid property '#{key}': column reference not found #{k}" object.delete(key) end end when :@context # Skip these when :@id # Must not be a BNode if value.to_s.start_with?("_:") log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'" end # Datatype @id MUST NOT be the URL of a built-in type if self.is_a?(Datatype) && DATATYPES.values.include?(value) log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype" end when :@type # Must not be a BNode if value.to_s.start_with?("_:") log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'" end case type when :Transformation log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template else log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type end when ->(k) {key.to_s.include?(':')} begin normalize_jsonld(key, value) rescue Error => e log_error "#{type} has invalid content '#{key}': #{e.message}" end end end self end
# File lib/rdf/tabular/metadata.rb, line 568 def validate! raise Error, "Metadata error" unless valid? end
Verify that the metadata we’re using is compatible with embedded metadata @param [Table] other @raise [Error] if not compatible
# File lib/rdf/tabular/metadata.rb, line 1002 def verify_compatible!(other) if self.is_a?(TableGroup) unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)} if @options[:validate] raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}" else log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}" end end else # Tables must have the same url unless url == other.url if @options[:validate] raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" else log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" end end # Each column description within B MUST match the corresponding column description in A for non-virtual columns non_virtual_columns = Array(tableSchema.columns).reject(&:virtual) object_columns = Array(other.tableSchema.columns) # Special case, if there is no header, then there are no column definitions, allow this as being compatible if non_virtual_columns.length != object_columns.length && !object_columns.empty? if @options[:validate] raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" else log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}" # If present, a virtual column MUST appear after all other non-virtual column definitions raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns virtual_columns = Array(tableSchema.columns).select(&:virtual) while non_virtual_columns.length < object_columns.length non_virtual_columns << nil end # Create necessary column entries tableSchema.columns = non_virtual_columns + virtual_columns end end index = 0 object_columns.all? do |cb| ca = non_virtual_columns[index] || Column.new({}, **@options) ta = ca.titles || {} tb = cb.titles || {} if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty? true elsif ca.object.has_key?(:name) && cb.object.has_key?(:name) raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name elsif @options[:validate] || !ta.empty? && !tb.empty? # If validating, column compatibility requires strict match between titles titles_match = case when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)} true when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)} true when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?} # Match on title and language true else # Match if a language from ta is a prefix of a language from tb with matching titles ta.any? do |la, values| tb.keys.any? do |lb| (la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty? end end end if titles_match true elsif !@options[:validate] # If not validating, columns don't match, but processing continues log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}" true else raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}" end end index += 1 end end true end
Protected Instance Methods
# File lib/rdf/tabular/metadata.rb, line 1266 def default_value(prop) self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop] end
Get the root metadata object @return [TableGroup, Table]
# File lib/rdf/tabular/metadata.rb, line 1273 def root self.parent ? self.parent.root : self end
General setter for array properties
# File lib/rdf/tabular/metadata.rb, line 1244 def set_array_value(key, value, klass, **options) object[key] = case value when Array value.map do |v| case v when Hash klass.new(v, **@options.merge(options).merge(parent: self, context: nil)) else v end end else log_warn "#{type} has invalid property '#{key}': expected array of #{klass}" [] end unless object[key].all? {|v| v.is_a?(klass)} log_warn "#{type} has invalid property '#{key}': expected array of #{klass}" # Remove elements that aren't of the right types object[key] = object[key].select! {|v| v.is_a?(klass)} end end
When setting a natural language property, always put in language-map form @param [Hash{String => String, Array<String>}, Array<String>, String] value @return [Hash{String => Array<String>}]
# File lib/rdf/tabular/metadata.rb, line 1230 def set_nl(value) case value when String then value when Array then value.select {|v| v.is_a?(String)} when Hash value.delete_if {|k, v| !BCP47.valid?(k)} value.each do |k, v| value[k] = Array(v).select {|vv| vv.is_a?(String)} end else nil end end
# File lib/rdf/tabular/metadata.rb, line 1209 def set_property(key, type, value, invalid) if invalid log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}" case type when :link, :uri_template object[key] = "" when :object object[key] = {} when :natural_language object[key] = set_nl(value) || [] else object.delete(key) end else object[key] = value end end
Private Instance Methods
Options passed to CSV.new based on dialect @todo lineTerminators is ignored, as CSV parser uses single string or ‘:auto`
# File lib/rdf/tabular/metadata.rb, line 1279 def csv_options { col_sep: (is_a?(Dialect) ? self : dialect).delimiter, #row_sep: Array((is_a?(Dialect) ? self : dialect).lineTerminators).first, quote_char: (is_a?(Dialect) ? self : dialect).quoteChar, encoding: (is_a?(Dialect) ? self : dialect).encoding } end