class RDF::Tabular::Dialect

Constants

DEFAULTS

Defaults for dialects

PROPERTIES
REQUIRED

Public Instance Methods

embedded_metadata(input, metadata, **options) click to toggle source

Extract a new Metadata document from the file or data provided

@param [#read, to_s] input IO, or file path or URL @param [Table] metadata used for saving annotations created while extracting metadata @param [Hash{Symbol => Object}] options

any additional options (see `RDF::Util::File.open_file`)

@option options [String] :lang, language to set in table, if any @return [Metadata] Tabular metadata @see w3c.github.io/csvw/syntax/#parsing

# File lib/rdf/tabular/metadata.rb, line 1792
def embedded_metadata(input, metadata, **options)
  options = options.dup
  options.delete(:context) # Don't accidentally use a passed context
  # Normalize input to an IO object
  if input.is_a?(String)
    return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
  end

  table = {
    "@context" => "http://www.w3.org/ns/csvw",
    "url" => (options.fetch(:base, "")),
    "@type" => "Table",
    "tableSchema" => {
      "@type" => "Schema",
      "columns" => []
    }
  }
  metadata ||= table  # In case the embedded metadata becomes the final metadata
  lang = metadata["lang"] = options[:lang] if options[:lang]
  lang ||= 'und'

  # Set encoding on input
  path = input.base_uri.path rescue ""
  if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
    # Input is HTML; use fragment identfier to find table.
    fragment = RDF::URI(table["url"]).fragment rescue nil
    tab = begin
      # Extract with nokogiri
      require 'nokogiri' unless defined?(:Nokogiri)
      doc = Nokogiri::HTML.parse(input)
      doc.search("##{fragment}").first if fragment
    rescue LoadError
      # Extract with REXML
      # FIXME
    end

    raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab

    # Use rows with <th> to create column titles
    tab.xpath('.//tr').each do |row|
      row.xpath('th').map(&:content).each_with_index do |value, index|
        # Skip columns
        skipCols = skipColumns.to_i
        next if index < skipCols || value.to_s.empty?

        # Trim value
        value.lstrip! if %w(true start).include?(trim.to_s)
        value.rstrip! if %w(true end).include?(trim.to_s)

        # Initialize titles
        columns = table["tableSchema"]["columns"] ||= []
        column = columns[index - skipCols] ||= {
          "titles" => {lang => []},
        }
        column["titles"][lang] << value if value
      end
    end
  else
    csv = ::CSV.new(input, **csv_options)
    (1..skipRows.to_i).each do
      value = csv.shift.join(delimiter)  # Skip initial lines, these form comment annotations
      # Trim value
      value.lstrip! if %w(true start).include?(trim.to_s)
      value.rstrip! if %w(true end).include?(trim.to_s)

      value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
      (metadata["rdfs:comment"] ||= []) << value unless value.empty?
    end
    log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}

    (1..headerRowCount).each do
      row_data = Array(csv.shift)
      Array(row_data).each_with_index do |value, index|
        # Skip columns
        skipCols = skipColumns.to_i
        next if index < skipCols || value.to_s.empty?

        # Trim value
        value.lstrip! if %w(true start).include?(trim.to_s)
        value.rstrip! if %w(true end).include?(trim.to_s)

        # Initialize titles
        columns = table["tableSchema"]["columns"] ||= []
        column = columns[index - skipCols] ||= {
          "titles" => {lang => []},
        }
        column["titles"][lang] << value
      end
    end
  end
  log_debug("embedded_metadata") {"table: #{table.inspect}"}
  input.rewind if input.respond_to?(:rewind)

  Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
end
escape_character() click to toggle source

escape character @return [String]

# File lib/rdf/tabular/metadata.rb, line 1766
def escape_character
  self.doubleQuote ? '"' : '\\'
end
headerRowCount() click to toggle source

default for headerRowCount is zero if header is false @return [Integer]

# File lib/rdf/tabular/metadata.rb, line 1772
def headerRowCount
  object.fetch(:headerRowCount, self.header ? 1 : 0)
end
trim() click to toggle source

default for trim comes from skipInitialSpace @return [Boolean, String]

# File lib/rdf/tabular/metadata.rb, line 1778
def trim
  object.fetch(:trim, self.skipInitialSpace ? 'start' : true)
end