class Sol::Lexer

Constants

IDENTIFIER
NEWLINE
NUMBER
OPERATOR
STRING
WHITESPACE

Attributes

KEYWORDS[R]

Public Class Methods

new() click to toggle source
# File lib/sol/lexer.rb, line 23
def initialize

    @KEYWORDS = ["func", "if", "true", "false", "null"]

end

Public Instance Methods

repl() click to toggle source
# File lib/sol/lexer.rb, line 52
def repl

    loop do
     
      line = Readline::readline('> ')
     
      break if line.nil? || line == 'quit'
     
      Readline::HISTORY.push(line)

      puts "#{tokenise(line)}" # Brackets are for clarity purposes
     
    end

end
tokenise(input) click to toggle source

This is how to implement a very simple scanner. Scan one caracter at the time until you find something to parse.

# File lib/sol/lexer.rb, line 32
def tokenise(input)

    @input = input.chomp # Cleanup code by remove extra line breaks

    @i = 0 # Current character position we're parsing

    @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]

    while @i < @input.length

      @chunk = @input[@i..-1]

      extract_next_token
     
    end

    return @tokens
     
end

Private Instance Methods

extract_next_token() click to toggle source
# File lib/sol/lexer.rb, line 70
def extract_next_token

    return if identifier_token

    return if number_token

    return if string_token

    return if whitespace_token

    return literal_token

end
identifier_token() click to toggle source

Matching if, print, method names, etc.

# File lib/sol/lexer.rb, line 86
def identifier_token

    return false unless identifier = @chunk[IDENTIFIER, 1]

    # Keywords are special identifiers tagged with their own name, 'if' will result
    # in an [:IF, "if"] token

    if @KEYWORDS.include?(identifier)

      @tokens << [identifier.upcase.to_sym, identifier]

    else

      @tokens << [:IDENTIFIER, identifier]

    end

    @i += identifier.length

end
literal_token() click to toggle source

We treat all other single characters as a token. Eg.: ( ) , . !

# File lib/sol/lexer.rb, line 139
def literal_token

    value = @chunk[NEWLINE, 1]

    if value

        @tokens << ["\n", "\n"]  unless @tokens.last && @tokens.last[0] == "\n"

        return @i + value.length

    end

    value = @chunk[OPERATOR, 1]

    value ||= @chunk[0, 1]

    @tokens << [value, value]

    @i += value.length

end
number_token() click to toggle source
# File lib/sol/lexer.rb, line 107
def number_token

    return false unless number = @chunk[NUMBER, 1]

    @tokens << [:NUMBER, number.to_i]

    @i += number.length

end
string_token() click to toggle source
# File lib/sol/lexer.rb, line 117
def string_token

    return false unless string = @chunk[STRING, 1]

    @tokens << [:STRING, string]

    @i += string.length + 2

end
whitespace_token() click to toggle source

Ignore whitespace

# File lib/sol/lexer.rb, line 129
def whitespace_token

    return false unless whitespace = @chunk[WHITESPACE, 1]

    @i += whitespace.length

end