class EleetScript::Lexer

Constants

KEYWORDS
RESERVED_WORDS
TOKEN_RX

Public Instance Methods

tokenize(code) click to toggle source
# File lib/lang/lexer.rb, line 34
def tokenize(code)
  tokens = []
  if code.length == 0
    return tokens
  end
  line = 1
  i = 0
  while i < code.length
    chunk = code[i..-1]
    if (operator = chunk[TOKEN_RX[:operator]])
      tokens << [operator, operator]
      i += operator.length
    elsif (constant = chunk[TOKEN_RX[:constants]])
      tokens << [:CONSTANT, constant]
      i += constant.length
    elsif (global = chunk[TOKEN_RX[:globals]])
      tokens << [:GLOBAL, $1]
      i += global.length
    elsif (class_var = chunk[TOKEN_RX[:class_var]])
      tokens << [:CLASS_IDENTIFIER, $1]
      i += class_var.length
    elsif (instance_var = chunk[TOKEN_RX[:instance_var]])
      tokens << [:INSTANCE_IDENTIFIER, $1]
      i += instance_var.length
    elsif (symbol = chunk[TOKEN_RX[:symbol]])
      tokens << [:SYMBOL, $1.to_sym]
      i += symbol.length
    elsif (regex = chunk[TOKEN_RX[:regex]])
      pattern, flags = $1, $2
      tokens << [:REGEX, pattern.gsub('\"', '"')]
      if flags && flags.length > 0
        tokens << [:REGEX_FLAGS, flags]
      end
      i += regex.length
    elsif (identifier = chunk[TOKEN_RX[:identifiers]])
      if KEYWORDS.include?(identifier)
        tokens << [identifier.upcase.gsub(/\?\!/, '').to_sym, identifier]
      else
        tokens << [:IDENTIFIER, identifier]
      end
      i += identifier.length
    elsif (float = chunk[TOKEN_RX[:float]])
      tokens << [:FLOAT, float.to_f]
      i += float.length
    elsif (integer = chunk[TOKEN_RX[:integer]])
      tokens << [:NUMBER, integer.to_i]
      i += integer.length
    elsif (string = chunk[TOKEN_RX[:string]])
      tokens << [:STRING, $1.gsub('\"', '"')]
      i += string.length
    elsif (comment = chunk[TOKEN_RX[:comment]])
      i += comment.length # Ignore comments
    elsif (terminator = chunk[TOKEN_RX[:terminator]])
      tokens << [:TERMINATOR, terminator]
      line += 1 if terminator == "\n"
      i += 1
    elsif (space = chunk[TOKEN_RX[:whitespace]])
      i += space.length # ignore spaces and tab characters
    else
      raise LexicalError.new(code[i], line)
    end
  end
  if tokens.length.positive? && tokens.last != [:TERMINATOR, "\n"]
    tokens << [:TERMINATOR, "\n"]
  end
  tokens << [:EOF, :eof] if tokens.length.positive?
  tokens
end