module PubliSci::RDFParser

Public Instance Methods

add_node(n,str="") click to toggle source
# File lib/publisci/parser.rb, line 147
def add_node(n,str="")

  raise "need index or identifier to generate blank nodes" unless n
  raise "need base string or blank string for blank node" unless str.is_a? String
  if str["node"]
    ret = str[0..-2] + "/#{n}" + ">"
    ret
    # str[0..-2] + "/#{n}" + ">"
  else
    "<node/#{n}>"
  end
end
bnode_value(obj, node_index, node_str, options) click to toggle source
# File lib/publisci/parser.rb, line 173
def bnode_value(obj, node_index, node_str, options)
  # TODO - Implement proper recursion
  # TODO - check if object is "a" (rdf:type) => or convert rdf:type to "a"
  str = ""
  subnodes = []
  if obj.is_a?(Array) # && obj.size == 2
    if obj.size == 2
      if obj[0].is_a?(String)
        if is_complex?(obj[1])
          str << "#{to_resource(obj[0])} #{add_node(node_index,node_str)} . \n"
          subnodes << encode_value(obj[1], options, node_index, node_str)
        else
          str << "#{to_resource(obj[0])} #{encode_value(obj[1], options, node_index, node_str)} "
        end
      elsif obj[0].is_a?(Array) && obj[1].is_a?(Array)
        newnode = add_node(0,node_str)
        v1 = bnode_value(obj[0], 0, node_str, options)
        v2 = bnode_value(obj[1], 1, node_str, options)

        if v1.is_a? Array
          subnodes << v1
          v1 = nil
        end

        if v2.is_a? Array
          subnodes << v2
          v2 = nil
        end

        if v1
          str << "#{v1} ;"
        end

        str << "\n#{v2} .\n" if v2
      end
    elsif obj.all?{|ent| ent.is_a? Array}
      obj.each{|ent|
        bn = bnode_value(ent,node_index,node_str,options)
        if bn.is_a? String
          str << bn + "\n"
        else
          str << bn[0] + "\n"
          subnodes << bn[1]
        end
      }
    end
  else
    raise "Invalid Structured value: #{obj}"
  end

  if subnodes.size > 0
    [str, subnodes.flatten].flatten
  else
    str
  end
end
encode_value(obj,options={}, node_index=nil, node_str = "") click to toggle source
# File lib/publisci/parser.rb, line 160
def encode_value(obj,options={}, node_index=nil, node_str = "")
  if RDF::Resource(obj).valid?
    to_resource(obj,options)
  elsif obj && obj.is_a?(String) && (obj[0]=="<" && obj[-1] = ">")
    obj
  elsif obj.is_a?(Array)
    node_str = add_node(node_index,node_str)
    ["#{node_str}" ] + [bnode_value(obj, node_index, node_str, options)]
  else
    to_literal(obj,options)
  end
end
get_ary(query_results,method='to_s') click to toggle source
# File lib/publisci/parser.rb, line 50
def get_ary(query_results,method='to_s')
  query_results.map{|solution|
    solution.to_a.map{|entry|
      if entry.last.respond_to? method
        entry.last.send(method)
      else
        entry.last.to_s
      end
    }
  }
end
get_hashes(query_results,method=nil) click to toggle source
# File lib/publisci/parser.rb, line 62
def get_hashes(query_results,method=nil)
  arr=[]
  query_results.map{|solution|
    h={}
    solution.map{|element|
      if method && element[1].respond_to?(method)
        h[element[0]] = element[1].send(method)
      else
        h[element[0]] = element[1]
      end
    }
    arr << h
  }
  arr
end
is_complex?(obj) click to toggle source
# File lib/publisci/parser.rb, line 143
def is_complex?(obj)
  obj.is_a? Array
end
is_uri?(obj) click to toggle source
# File lib/publisci/parser.rb, line 4
def is_uri?(obj)
  RDF::Resource(obj).valid?
end
load_string(string,repo=RDF::Repository.new) click to toggle source
# File lib/publisci/parser.rb, line 41
def load_string(string,repo=RDF::Repository.new)
  f = Tempfile.new('repo')
  f.write(string)
  f.close
  repo.load(f.path, :format => :ttl)
  f.unlink
  repo
end
observation_hash(query_results,shorten_uris=false,method='to_s') click to toggle source
# File lib/publisci/parser.rb, line 78
def observation_hash(query_results,shorten_uris=false,method='to_s')
  h={}
  query_results.map{|sol|
    (h[sol[:observation].to_s] ||= {})[sol[:property].to_s] = sol[:value].to_s
  }

  if shorten_uris
    newh= {}
    h.map{|k,v|
      newh[strip_uri(k)] ||= {}
      v.map{|kk,vv|
        newh[strip_uri(k)][strip_uri(kk)] = strip_uri(vv)
      }
    }
    newh
  else
    h
  end
end
sanitize(array) click to toggle source
# File lib/publisci/parser.rb, line 8
def sanitize(array)
  #remove spaces and other special characters
  array = Array(array)
  processed = []
  array.map{|entry|
    if entry.is_a? String
      if is_uri? entry
        processed << entry.gsub(/[\s]/,'_')
      else
        processed << entry.gsub(/[\s]/,'_')
      end
    else
      processed << entry
    end
  }
  processed
end
sanitize_hash(h) click to toggle source
# File lib/publisci/parser.rb, line 26
def sanitize_hash(h)
  mappings = {}
  h.keys.map{|k|
    if(k.is_a? String)
      mappings[k] = k.gsub(' ','_')
    end
  }

  h.keys.map{|k|
    h[mappings[k]] = h.delete(k) if mappings[k]
  }

  h
end
strip_prefixes(string) click to toggle source
# File lib/publisci/parser.rb, line 261
def strip_prefixes(string)
  string.to_s.split(':').last
end
strip_uri(uri) click to toggle source
# File lib/publisci/parser.rb, line 255
def strip_uri(uri)
  uri = uri.to_s.dup
  uri[-1] = '' if uri[-1] == '>'
  uri.to_s.split('/').last.split('#').last
end
to_literal(obj, options={}) click to toggle source
# File lib/publisci/parser.rb, line 124
def to_literal(obj, options={})
  if obj.is_a? String
    # Depressing that there's no more elegant way to check if a string is
    # a number...
    if val = Integer(obj) rescue nil
      val
    elsif val = Float(obj) rescue nil
      val
    else
      '"'+obj+'"'
    end
  elsif obj == nil && options[:encode_nulls]
    #TODO decide the right way to handle missing values, since RDF has no null
    'rdf:nil'
  else
    obj
  end
end
to_resource(obj, options={}) click to toggle source
# File lib/publisci/parser.rb, line 98
def to_resource(obj, options={})
  if obj.is_a? String

    if is_uri? obj
      obj = RDF::Resource(obj).to_base unless obj[/\w+:\w/]
    else

      #TODO decide the right way to handle missing values, since RDF has no null
      #probably throw an error here since a missing resource is a bigger problem
      obj = "rdf:nil" if obj.empty?
      obj=  obj.to_s.gsub(' ','_')
    end

    obj
    #TODO  remove special characters (faster) as well (eg '?')

  elsif obj == nil && options[:encode_nulls]
    'rdf:nil'
  elsif obj.is_a? Numeric
    #resources cannot be referred to purely by integer (?)
    "n"+obj.to_s
  else
    obj
  end
end
turtle_indent(turtle_str) click to toggle source
# File lib/publisci/parser.rb, line 230
def turtle_indent(turtle_str)
  tabs = 0
  turtle_str.split("\n").map{|str|
    case str[-1]
      when "."
        last_tabs = tabs
        tabs = 0
        ("  " * last_tabs) + str
      when ";"
        last_tabs = tabs
        tabs = 1 if tabs == 0
        ("  " * last_tabs) + str
      else
        last_tabs = tabs
        if str.size < 2
          tabs = 0
        else
          tabs += 1
        end
        ("  " * last_tabs) + str
    end
  }.join("\n")

end