class Bio::Newick
Public Instance Methods
__parse_newick_tokenize(str, options)
click to toggle source
splits string to tokens
# File lib/iroki/main/main.rb, line 48 def __parse_newick_tokenize(str, options) str = str.chop if str[-1..-1] == ';' # http://evolution.genetics.washington.edu/phylip/newick_doc.html # quoted_label ==> ' string_of_printing_characters ' # single quote in quoted_label is '' (two single quotes) # if __get_option(:parser, options) == :naive then ary = str.split(/([\(\)\,\:\[\]])/) ary.collect! { |x| x.strip!; x.empty? ? nil : x } ary.compact! ary.collect! do |x| if /\A([\(\)\,\:\[\]])\z/ =~ x then x.intern else x end end return ary end tokens = [] ss = StringScanner.new(str) while !(ss.eos?) if ss.scan(/\s+/) then # do nothing elsif ss.scan(/[\(\)\,\:\[\]]/) then # '(' or ')' or ',' or ':' or '[' or ']' t = ss.matched tokens.push t.intern elsif ss.scan(/\'/) then # quoted_label t = '' while true if ss.scan(/([^\']*)\'/) then t.concat ss[1] if ss.scan(/\'/) then # single quote in quoted_label t.concat ss.matched else break end else # incomplete quoted_label? break end end #while true unless ss.match?(/\s*[\(\)\,\:\[\]]/) or ss.match?(/\s*\z/) then # label continues? (illegal, but try to rescue) if ss.scan(/[^\(\)\,\:\[\]]+/) then t.concat ss.matched.lstrip end end tokens.push t elsif ss.scan(/[^\(\)\,\:\[\]]+/) then # unquoted_label t = ss.matched.strip t.gsub!(/[\r\n]/, '') unless __get_option(:parser, options) == :iroki then # unquoted underscore should be converted to blank t.gsub!(/\_/, ' ') end tokens.push t unless t.empty? else # unquoted_label in end of string t = ss.rest.strip t.gsub!(/[\r\n]/, '') unless __get_option(:parser, options) == :iroki then # unquoted underscore should be converted to blank t.gsub!(/\_/, ' ') end tokens.push t unless t.empty? ss.terminate end end #while !(ss.eos?) tokens end