class RDF::Tabular::Dialect
Constants
- DEFAULTS
Defaults for dialects
- PROPERTIES
- REQUIRED
Public Instance Methods
embedded_metadata(input, metadata, **options)
click to toggle source
Extract a new Metadata
document from the file or data provided
@param [#read, to_s] input IO, or file path or URL @param [Table] metadata used for saving annotations created while extracting metadata @param [Hash{Symbol => Object}] options
any additional options (see `RDF::Util::File.open_file`)
@option options [String] :lang, language to set in table, if any @return [Metadata] Tabular
metadata @see w3c.github.io/csvw/syntax/#parsing
# File lib/rdf/tabular/metadata.rb, line 1792 def embedded_metadata(input, metadata, **options) options = options.dup options.delete(:context) # Don't accidentally use a passed context # Normalize input to an IO object if input.is_a?(String) return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))} end table = { "@context" => "http://www.w3.org/ns/csvw", "url" => (options.fetch(:base, "")), "@type" => "Table", "tableSchema" => { "@type" => "Schema", "columns" => [] } } metadata ||= table # In case the embedded metadata becomes the final metadata lang = metadata["lang"] = options[:lang] if options[:lang] lang ||= 'und' # Set encoding on input path = input.base_uri.path rescue "" if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html' # Input is HTML; use fragment identfier to find table. fragment = RDF::URI(table["url"]).fragment rescue nil tab = begin # Extract with nokogiri require 'nokogiri' unless defined?(:Nokogiri) doc = Nokogiri::HTML.parse(input) doc.search("##{fragment}").first if fragment rescue LoadError # Extract with REXML # FIXME end raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab # Use rows with <th> to create column titles tab.xpath('.//tr').each do |row| row.xpath('th').map(&:content).each_with_index do |value, index| # Skip columns skipCols = skipColumns.to_i next if index < skipCols || value.to_s.empty? # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) # Initialize titles columns = table["tableSchema"]["columns"] ||= [] column = columns[index - skipCols] ||= { "titles" => {lang => []}, } column["titles"][lang] << value if value end end else csv = ::CSV.new(input, **csv_options) (1..skipRows.to_i).each do value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix) (metadata["rdfs:comment"] ||= []) << value unless value.empty? end log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"} (1..headerRowCount).each do row_data = Array(csv.shift) Array(row_data).each_with_index do |value, index| # Skip columns skipCols = skipColumns.to_i next if index < skipCols || value.to_s.empty? # Trim value value.lstrip! if %w(true start).include?(trim.to_s) value.rstrip! if %w(true end).include?(trim.to_s) # Initialize titles columns = table["tableSchema"]["columns"] ||= [] column = columns[index - skipCols] ||= { "titles" => {lang => []}, } column["titles"][lang] << value end end end log_debug("embedded_metadata") {"table: #{table.inspect}"} input.rewind if input.respond_to?(:rewind) Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}")) end
escape_character()
click to toggle source
escape character @return [String]
# File lib/rdf/tabular/metadata.rb, line 1766 def escape_character self.doubleQuote ? '"' : '\\' end
headerRowCount()
click to toggle source
default for headerRowCount is zero if header is false @return [Integer]
# File lib/rdf/tabular/metadata.rb, line 1772 def headerRowCount object.fetch(:headerRowCount, self.header ? 1 : 0) end
trim()
click to toggle source
default for trim comes from skipInitialSpace @return [Boolean, String]
# File lib/rdf/tabular/metadata.rb, line 1778 def trim object.fetch(:trim, self.skipInitialSpace ? 'start' : true) end