class NScript::Lexer

Constants

ASSIGNMENT
CALLABLE
CODE
COMMENT
COMMENT_CLEANER
HEREDOC
HEREDOC_INDENT
IDENTIFIER
JS
JS_CLEANER
KEYWORDS
LAST_DENT
MULTILINER
MULTI_DENT
NOT_REGEX
NO_NEWLINE
NUMBER
OPERATOR
REGEX
STRING
STRING_NEWLINES
WHITESPACE

Public Instance Methods

close_indentation() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 224
def close_indentation
  outdent_token(@indent)
end
comment_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 133
def comment_token
  return false unless comment = @chunk[COMMENT, 1]
  @line += comment.scan(MULTILINER).length
  token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
  token("\n", "\n")
  @i += comment.length
end
extract_next_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 62
def extract_next_token
  return if identifier_token
  return if number_token
  return if heredoc_token
  return if string_token
  return if js_token
  return if regex_token
  return if indent_token
  return if comment_token
  return if whitespace_token
  return    literal_token
end
heredoc_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 108
def heredoc_token
  return false unless match = @chunk.match(HEREDOC)
  doc = match[2] || match[4]
  indent = doc.scan(HEREDOC_INDENT).min
  doc.gsub!(/^#{indent}/, "")
  doc.gsub!("\n", "\\n")
  doc.gsub!('"', '\\"')
  token(:STRING, "\"#{doc}\"")
  @line += match[1].count("\n")
  @i += match[1].length
end
identifier_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 75
def identifier_token
  return false unless identifier = @chunk[IDENTIFIER, 1]
  # Keywords are special identifiers tagged with their own name,
  # 'if' will result in an [:IF, "if"] token.
  tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
  tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
  @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
  if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
    if @tokens[-2][0] == "?"
      @tokens[-1][0] = :SOAK_ACCESS
      @tokens.delete_at(-2)
    else
      @tokens[-1][0] = :PROPERTY_ACCESS
    end
  end
  token(tag, identifier)
  @i += identifier.length
end
indent_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 141
def indent_token
  return false unless indent = @chunk[MULTI_DENT, 1]
  @line += indent.scan(MULTILINER).size
  @i += indent.size
  next_character = @chunk[MULTI_DENT, 4]
  no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.'  && !last_value.match(CODE))
  return suppress_newlines(indent) if no_newlines
  size = indent.scan(LAST_DENT).last.last.length
  return newline_token(indent) if size == @indent
  if size > @indent
    token(:INDENT, size - @indent)
    @indents << (size - @indent)
  else
    outdent_token(@indent - size)
  end
  @indent = size
end
js_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 120
def js_token
  return false unless script = @chunk[JS, 1]
  token(:JS, script.gsub(JS_CLEANER, ''))
  @i += script.length
end
last_tag() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 205
def last_tag
  @tokens.last && @tokens.last[0]
end
last_value() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 201
def last_value
  @tokens.last && @tokens.last[1]
end
literal_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 184
def literal_token
  value = @chunk[OPERATOR, 1]
  tag_parameters if value && value.match(CODE)
  value ||= @chunk[0,1]
  tag = value.match(ASSIGNMENT) ? :ASSIGN : value
  if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
    tag = :CALL_START  if value == '('
    tag = :INDEX_START if value == '['
  end
  token(tag, value)
  @i += value.length
end
newline_token(newlines) click to toggle source
# File lib/nscript/lexer/lexer.rb, line 174
def newline_token(newlines)
  token("\n", "\n") unless last_value == "\n"
  true
end
number_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 94
def number_token
  return false unless number = @chunk[NUMBER, 1]
  token(:NUMBER, number)
  @i += number.length
end
outdent_token(move_out) click to toggle source
# File lib/nscript/lexer/lexer.rb, line 159
def outdent_token(move_out)
  while move_out > 0 && !@indents.empty?
    last_indent = @indents.pop
    token(:OUTDENT, last_indent)
    move_out -= last_indent
  end
  token("\n", "\n")
end
regex_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 126
def regex_token
  return false unless regex = @chunk[REGEX, 1]
  return false if NOT_REGEX.include?(last_tag)
  token(:REGEX, regex)
  @i += regex.length
end
string_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 100
def string_token
  return false unless string = @chunk[STRING, 1]
  escaped = string.gsub(STRING_NEWLINES, " \\\n")
  token(:STRING, escaped)
  @line += string.count("\n")
  @i += string.length
end
suppress_newlines(newlines) click to toggle source
# File lib/nscript/lexer/lexer.rb, line 179
def suppress_newlines(newlines)
  @tokens.pop if last_value == "\\"
  true
end
tag_parameters() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 209
def tag_parameters
  return if last_tag != ')'
  i = 0
  loop do
    i -= 1
    tok = @tokens[i]
    return if !tok
    case tok[0]
    when :IDENTIFIER  then tok[0] = :PARAM
    when ')'          then tok[0] = :PARAM_END
    when '('          then return tok[0] = :PARAM_START
    end
  end
end
token(tag, value) click to toggle source
# File lib/nscript/lexer/lexer.rb, line 197
def token(tag, value)
  @tokens << [tag, Value.new(value, @line)]
end
tokenize(code) click to toggle source
# File lib/nscript/lexer/lexer.rb, line 45
def tokenize(code)
  @code    = code.chomp # Cleanup code by remove extra line breaks
  @i       = 0          # Current character position we're parsing
  @line    = 1          # The current line.
  @indent  = 0          # The current indent level.
  @indents = []         # The stack of all indent levels we are currently within.
  @tokens  = []         # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
  @spaced  = nil        # The last value that has a space following it.
  while @i < @code.length
    @chunk = @code[@i..-1]
    extract_next_token
  end
  puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
  close_indentation
  Rewriter.new.rewrite(@tokens)
end
whitespace_token() click to toggle source
# File lib/nscript/lexer/lexer.rb, line 168
def whitespace_token
  return false unless whitespace = @chunk[WHITESPACE, 1]
  @spaced = last_value
  @i += whitespace.length
end