class StringParser

Attributes

data[RW]
elist[RW]
id[RW]
level[RW]
pos[RW]
tncnt[RW]

Public Class Methods

new(str) click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 36
def initialize(str)
  # Clean up the data a little to make processing easier
  string = str.gsub(/\t/, "") rescue ""
  string.gsub!(/\s+/, " ")
  string.gsub!(/\] \[/, "][")
  string.gsub!(/ \[/, "[")

  @data = string # Store it for later...
  @elist = ElementList.new # Initialize internal element list
  @pos = 0 # Position in the sentence
  @id = 1 # ID for the next element
  @level = 0 # Level in the diagram
  @tncnt = Hash.new # Node type counts
end

Public Instance Methods

auto_subscript() click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 101
def auto_subscript
  elements = @elist.get_elements
  tmpcnt   = Hash.new
  elements.each do |element|
    if(element.type == ETYPE_NODE)
      count = 1
      content = element.content

      if @tncnt[content]
        count = @tncnt[content]
      end

      if(count > 1)
        if tmpcnt[content]
          tmpcnt[content] += 1
        else
          tmpcnt[content] = 1
        end

        element.content += ("_" + tmpcnt[content].to_s)
      end

    end
  end  
  @tncnt
end
count_node(name) click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 128
def count_node(name)
  name = name.strip
  if @tncnt[name]
    @tncnt[name] += 1
  else
    @tncnt[name] = 1
  end
end
get_elementlist() click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 97
def get_elementlist
  @elist;
end
get_next_token() click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 137
def get_next_token
  data = @data.split(//)
  gottoken = false
  token = ""
  i = 0

  if((@pos + 1) >= data.length)
    return ""
  end

  escape = false
  while(((@pos + i) < data.length) && !gottoken)
    ch = data[@pos + i];
    case ch
    when "["
      if escape
        token += ch
        escape = false
      else
        if(i > 0)
          gottoken = true
        else
          token += ch
        end
      end
    when "]"
      if escape
        token += ch
        escape = false
      else
        if(i == 0 )
          token += ch
        end
        gottoken = true
      end
    when "\\"
      escape = true
    when /[\n\r]/
      gottoken = false # same as do nothing
    else
      token += ch
      escape = false if escape
    end
    i += 1
  end

  if(i > 1)
    @pos += (i - 1)
  else
    @pos += 1
  end
  return token
end
make_tree(parent) click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 191
def make_tree(parent)
  token = get_next_token.strip
  parts = Array.new

  while(token != "" && token != "]" )
    token_r = token.split(//)
    case token_r[0]
    when "["
      tl = token_r.length
      token_r = token_r[1, tl - 1]
      spaceat = token_r.index(" ")
      newparent  = -1

      if spaceat
        parts[0] = token_r[0, spaceat].join
        parts[0] = parts[0].gsub("<>", " ")

        tl =token_r.length
        parts[1] = token_r[spaceat, tl - spaceat].join
        parts[1] = parts[1].gsub("<>", " ")

        element = Element.new(@id, parent, parts[0], @level)
        @id += 1
        @elist.add(element)
        newparent = element.id
        count_node(parts[0])

        element = Element.new(@id, @id - 1, parts[1], @level + 1 )
        @id += 1          
        @elist.add(element)
      else
        joined = token_r.join.gsub("<>", " ")
        element = Element.new(@id, parent, joined, @level)
        @id += 1          
        newparent = element.id
        @elist.add(element)
        count_node(joined)
      end 

      @level += 1
      make_tree(newparent)

    else
      if token.strip != ""
        element = Element.new(@id, parent, token, @level)
        @id += 1          
        @elist.add(element)
        count_node(token)
      end
    end

    token = get_next_token
  end
  @level -= 1
end
parse() click to toggle source
# File lib/rsyntaxtree/string_parser.rb, line 93
def parse
  make_tree(0);
end
valid?() click to toggle source

caution: quick and dirty solution

# File lib/rsyntaxtree/string_parser.rb, line 52
def valid?
  if(@data.length < 1)
    return false
  end

  if /\[\s*\]/m =~ @data
    return false  
  end

  if /\[\_/ =~ @data
    return false
  end

  text = @data.strip
  text_r = text.split(//)
  open_br, close_br = [], []
  escape = false
  text_r.each do |chr|
    if chr == "\\"
      escape = true
    elsif chr == '[' && !escape
      open_br.push(chr)
    elsif chr == ']' && !escape
      close_br.push(chr)
      if open_br.length < close_br.length
        break
      end
    elsif escape
      escape = false
    end
  end

  return false unless open_br.length == close_br.length
  # make_tree(0)
  # return false if @tncnt.empty?
  # @tncnt.each do |key, value|
  #   return false if key == ""
  # end
  return true
end