class OboParser::Tokens::TagValuePair

Token needs simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual tokens for individual tags that don't conform to the pattern used for def: tags. The code can't presently handle escaped characters (like ,), as bizzarely found in some OBO files.

Attributes

comment[R]
description[R]
qualifier[R]
tag[R]
xrefs[R]

Public Class Methods

new(str) click to toggle source
# File lib/obo_parser/tokens.rb, line 26
def initialize(str)
  str.strip!
  tag, value = str.split(':',2)      
  value.strip!

  if tag == 'comment'
    @tag = tag.strip
    @value = value.strip
    return
  end

  @xrefs = []

  # Handle inline comments
  if value =~ /(\s+!\s*.+)\s*\n*\z/i
    @comment = $1
    value.gsub!(@comment, '')
    @comment.strip!
    @comment.gsub!(/\A!\s*/, '')
  end 

  value.strip!

  # Qualifier for the whole tag
  if value =~ /(\{[^{]*?\})\s*\n*\z/
    @qualifier = $1
    value.gsub!(@qualifier, '')
    @qualifier.strip!
  end 

  value.strip!

  # Handle a xref list TODO: Tokenize
  if value =~ /(\[.*\])/i 
    xref_list = $1
    value.gsub!(xref_list, '')

    xref_list.strip!
    xref_list = xref_list[1..-2] # [] off

    qq = 0 # some failsafes
    while xref_list.length > 0
      qq += 1
      raise "#{xref_list} is seemingly infinite" if qq > 500
      xref_list.gsub!(/\A\s*,\s*/, '')

      xref_list =~ /\A(.+?:[^\"\{\,]+)/i 

      # xref_list =~ /\A(.+?:[^\"|\{|\,]+)/i

      v = $1

      if !(v == "") && !v.nil? 
        v.strip!
        r = Regexp.escape v 
        xref_list.gsub!(/\A#{r}\s*/, '')
        @xrefs.push(v) if !v.nil?
      end

      xref_list.strip!

      # A description
      if xref_list =~ /\A(\s*".*?")/i
        d = $1
        r = Regexp.escape d 
        xref_list.gsub!(/\A#{r}/, '') 
        xref_list.strip!
      end

      # A optional modifier
      if xref_list =~ /\A(\s*\{[^\}]*?\})/ 
        m = $1
        r = Regexp.escape m
        xref_list.gsub!(/\A#{r}/, '') 
        xref_list.strip!
      end

      xref_list.strip!
    end
  end

  value.strip!

  # At this point we still might have a '"foo" QUALIFIER' combination
  if value =~ /\A(\"[^\"]*\")\s+(.*)/
    @value = $1.strip
    @qualifier = $2.strip if !$2.nil?
  else
    @value = value.strip
  end

  @value = @value[1..-2].strip if @value[0..0] == "\"" 
  @tag = tag.strip
  @value.strip!
end