class Riml::Lexer
Constants
- ANCHORED_INTERPOLATION_REGEX
- END_KEYWORDS_SYMBOLS
- INTERPOLATION_REGEX
- INTERPOLATION_SPLIT_REGEX
- OPERATOR_REGEX
- SINGLE_LINE_COMMENT_REGEX
- STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX
have to use string constructor, as parser would throw
SyntaxError
if RUBY_VERSION < ‘1.9’. Literal regexp is ‘/A“(.*?)(?<!\)”/`
Attributes
current_indent[R]
filename[R]
ignore_indentation_check[RW]
for REPL
lineno[RW]
parser_info[R]
prev_token[R]
tokens[R]
Public Class Methods
new(code, filename = nil, parser_info = false)
click to toggle source
# File lib/riml/lexer.rb, line 23 def initialize(code, filename = nil, parser_info = false) code.chomp! @s = StringScanner.new(code) @filename = filename || COMPILED_STRING_LOCATION @parser_info = parser_info # array of doubles and triples: [tokenname, tokenval, lineno_to_add(optional)] # ex: [[:NEWLINE, "\n"]] OR [[:NEWLINE, "\n", 1]] @token_buf = [] # array of doubles OR triples, depending if `@parser_info` is set to true # doubles: [tokenname, tokenval] # ex: [[:NEWLINE, "\n"], ...] # triples: [tokenname, tokenval, parser_info] # ex: [[:NEWLINE, "\n", { :lineno => 1, :filename => 'main.riml' }], ...] @tokens = [] @prev_token = nil @lineno = 1 @current_indent = 0 @indent_pending = false @dedent_pending = false @in_function_declaration = false end
Public Instance Methods
next_token()
click to toggle source
TODO: fix this slow method
# File lib/riml/lexer.rb, line 51 def next_token while (buf_empty = @token_buf.empty?) && !@s.eos? tokenize_chunk end if !buf_empty token = @token_buf.shift if token.size == 3 @lineno += token.pop end if @parser_info @tokens << decorate_token(token) @prev_token = token.first(2) return token else @tokens << token return @prev_token = token end end check_indentation unless ignore_indentation_check nil end
prev_token_is_keyword?(n = 2)
click to toggle source
Checks if any of previous n tokens are keywords. If any found, return the keyword, otherwise returns ‘false`.
# File lib/riml/lexer.rb, line 236 def prev_token_is_keyword?(n = 2) return false if n <= 0 (1..n).each do |i| t = tokens[-i] if t && t[1] && KEYWORDS.include?(t[1]) return t[1] end end false end
tokenize()
click to toggle source
# File lib/riml/lexer.rb, line 45 def tokenize while next_token != nil; end @tokens end
tokenize_chunk()
click to toggle source
# File lib/riml/lexer.rb, line 73 def tokenize_chunk # deal with line continuations if cont = @s.scan(/\A\r?\n*[ \t\f]*\\/m) @lineno += cont.each_line.to_a.size - 1 return end # all lines that start with ':' pass right through unmodified if (prev_token.nil? || prev_token[0] == :NEWLINE) && @s.scan(/\A[ \t\f]*:(.*)?$/) @token_buf << [:EX_LITERAL, @s[1]] return end if splat_var = @s.scan(/\Aa:\d+/) @token_buf << [:SCOPE_MODIFIER, 'a:'] << [:IDENTIFIER, splat_var[2..-1]] # the 'n' scope modifier is added by riml elsif @s.check(/\A([bwtglsavn]:)(\w|\{)/) @token_buf << [:SCOPE_MODIFIER, @s[1]] @s.pos += 2 elsif scope_modifier_literal = @s.scan(/\A([bwtglsavn]:)/) @token_buf << [:SCOPE_MODIFIER_LITERAL, scope_modifier_literal] elsif special_var_prefix = (!@s.check(/\A&(\w:)?&/) && @s.scan(/\A(&(\w:)?|\$|@)/)) @token_buf << [:SPECIAL_VAR_PREFIX, special_var_prefix.strip] if special_var_prefix == '@' next_char = @s.peek(1) if REGISTERS.include?(next_char) @token_buf << [:IDENTIFIER, next_char] @s.getch end else @expecting_identifier = true end elsif @s.scan(/\A(function)\(/) @token_buf << [:IDENTIFIER, @s[1]] @s.pos -= 1 elsif identifier = @s.check(/\A[a-zA-Z_][\w#]*(\?|!)?/) # keyword identifiers if KEYWORDS.include?(identifier) if identifier.match(/\Afunction/) old_identifier = identifier.dup identifier.sub!(/function/, "def") @s.pos += (old_identifier.size - identifier.size) end if DEFINE_KEYWORDS.include?(identifier) @in_function_declaration = true end # strip '?' out of token names and replace '!' with '_bang' token_name = identifier.sub(/\?\Z/, "").sub(/!\Z/, "_bang").upcase track_indent_level(identifier) if VIML_END_KEYWORDS.include?(identifier) token_name = :END end @token_buf << [token_name.to_sym, identifier] elsif BUILTIN_COMMANDS.include?(identifier) && @s.peek(identifier.size + 1)[-1, 1] != '(' @token_buf << [:BUILTIN_COMMAND, identifier] elsif RIML_FILE_COMMANDS.include? identifier @token_buf << [:RIML_FILE_COMMAND, identifier] elsif RIML_CLASS_COMMANDS.include? identifier @token_buf << [:RIML_CLASS_COMMAND, identifier] elsif VIML_COMMANDS.include?(identifier) && (prev_token.nil? || prev_token[0] == :NEWLINE) @s.pos += identifier.size until_eol = @s.scan(/.*$/).to_s @token_buf << [:EX_LITERAL, identifier << until_eol] return # method names and variable names else @token_buf << [:IDENTIFIER, identifier] end @s.pos += identifier.size parse_dict_vals! elsif @in_function_declaration && (splat_param = @s.scan(/\A(\.{3}|\*[a-zA-Z_]\w*)/)) @token_buf << [:SPLAT_PARAM, splat_param] # splat in calling context. ex: super(*args) or super(*(args + other_args)) or func('hey', *args) elsif !@in_function_declaration && prev_token && @s.check(/\A\*(\w+|\()/) @token_buf << [:SPLAT_ARG, @s.getch] # integer (octal) elsif octal = @s.scan(/\A0[0-7]+/) @token_buf << [:NUMBER, octal] # integer (hex) elsif hex = @s.scan(/\A0[xX][0-9a-fA-F]+/) @token_buf << [:NUMBER, hex] # integer or float (decimal) elsif decimal = @s.scan(/\A[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?)?/) @token_buf << [:NUMBER, decimal] elsif interpolation = @s.scan(ANCHORED_INTERPOLATION_REGEX) # "hey there, #{name}" = "hey there, " . name parts = interpolation[1...-1].split(INTERPOLATION_SPLIT_REGEX) handle_interpolation(*parts) elsif (single_line_comment = @s.check(SINGLE_LINE_COMMENT_REGEX)) && (prev_token.nil? || prev_token[0] == :NEWLINE) @s.pos += single_line_comment.size @s.pos += 1 unless @s.eos? # consume newline @lineno += single_line_comment.each_line.to_a.size elsif inline_comment = @s.scan(/\A[ \t\f]*"[^"]*?$/) @lineno += inline_comment.each_line.to_a.size - 1 elsif (str = lex_string_double) @token_buf << [:STRING_D, str] elsif @s.scan(/\A'(([^']|'')*)'/) @token_buf << [:STRING_S, @s[1]] elsif newlines = @s.scan(/\A([\r\n]+)/) # push only 1 newline @token_buf << [:NEWLINE, "\n"] unless prev_token && prev_token[0] == :NEWLINE # pending indents/dedents if @indent_pending @indent_pending = false elsif @dedent_pending @dedent_pending = false end if @in_function_declaration @in_function_declaration = false end @lineno += newlines.size # heredoc elsif @s.scan(%r{\A<<(.+?)\r?\n}) pattern = @s[1] @s.check(%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|m) heredoc_string = @s[1] @s.pos += (pattern.size + heredoc_string.size) heredoc_string.chomp! if heredoc_string =~ INTERPOLATION_REGEX || %Q("#{heredoc_string}") =~ INTERPOLATION_REGEX parts = heredoc_string.split(INTERPOLATION_SPLIT_REGEX) handle_interpolation(*parts) else @token_buf << [:STRING_D, escape_chars!(heredoc_string)] end @lineno += heredoc_string.each_line.to_a.size # operators of more than 1 char elsif operator = @s.scan(OPERATOR_REGEX) @token_buf << [operator, operator] elsif regexp = @s.scan(%r{\A/.*?[^\\]/}) @token_buf << [:REGEXP, regexp] # whitespaces elsif @s.scan(/\A[ \t\f]+/) # operators and tokens of single chars, one of: ( ) , . [ ] ! + - = < > / else value = @s.getch if value == '|' @token_buf << [:NEWLINE, "\n"] else @token_buf << [value, value] end # if we encounter `funcCall().`, the next character must be treated as # a dictionary retrieval operation, not a string concatenation # operation. # However, if we see `funcCall().l:localVar`, we know it must be a # string concatenation operation. if value == ']' || value == ')' && (@s.peek(1) == '.' && @s.peek(3) != ':') parse_dict_vals! end end end
Private Instance Methods
check_indentation()
click to toggle source
# File lib/riml/lexer.rb, line 318 def check_indentation if @current_indent > 0 error_msg = "Missing #{(@current_indent / 2)} END identifier(s)" error = Riml::SyntaxError.new(error_msg, @filename, @lineno) raise error elsif @current_indent < 0 error_msg = "#{(@current_indent / 2).abs} too many END identifiers" error = Riml::SyntaxError.new(error_msg, @filename, @lineno) raise error end end
decorate_token(token)
click to toggle source
# File lib/riml/lexer.rb, line 278 def decorate_token(token) token << { :lineno => @lineno, :filename => @filename } end
escape_chars!(string)
click to toggle source
# File lib/riml/lexer.rb, line 343 def escape_chars!(string) string.gsub!(/"/, '\"') string.gsub!(/\n/, "\\n") string end
handle_interpolation(*parts)
click to toggle source
# File lib/riml/lexer.rb, line 330 def handle_interpolation(*parts) parts.delete_if {|p| p.empty?}.each_with_index do |part, i| if part[0..1] == '#{' && part[-1, 1] == '}' interpolation_content = part[2...-1] @token_buf.concat tokenize_without_moving_pos(interpolation_content) else @token_buf << [:STRING_D, escape_chars!(part)] end # string-concatenate all the parts unless this is the last part @token_buf << ['.', '.'] unless parts[i + 1].nil? end end
lex_string_double()
click to toggle source
# File lib/riml/lexer.rb, line 254 def lex_string_double @s.scan(STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX) && @s[1] end
parse_dict_vals!()
click to toggle source
‘dict.key` or `dict.key.other_key`, etc.
# File lib/riml/lexer.rb, line 304 def parse_dict_vals! if @s.scan(/\A\.([\w.]+)(?!:)/) vals = @s[1] parts = vals.split('.') if @in_function_declaration @token_buf.last[1] << ".#{vals}" else while key = parts.shift @token_buf << [:DICT_VAL, key] end end end end
statement_modifier?()
click to toggle source
# File lib/riml/lexer.rb, line 355 def statement_modifier? old_pos = @s.pos # backtrack until the beginning of the line @s.pos -= 1 until @s.bol? @s.check(/\A(.+?)(if|unless).+?$/) && !@s[1].strip.empty? ensure @s.pos = old_pos end
tokenize_without_moving_pos(code)
click to toggle source
# File lib/riml/lexer.rb, line 349 def tokenize_without_moving_pos(code) Lexer.new(code, filename, false).tap do |l| l.lineno = @lineno end.tokenize end
track_indent_level(identifier)
click to toggle source
# File lib/riml/lexer.rb, line 287 def track_indent_level(identifier) case identifier.to_sym when :def, :def!, :defm, :defm!, :while, :until, :for, :try, :class @current_indent += 2 @indent_pending = true when :if, :unless if !statement_modifier? @current_indent += 2 @indent_pending = true end when *END_KEYWORDS_SYMBOLS @current_indent -= 2 @dedent_pending = true end end