class Nasl::Tokenizer
Public Class Methods
new(code, path)
click to toggle source
# File lib/nasl/tokenizer.rb, line 160 def initialize(code, path) @code = code # Perform one-time initialization of tokenizer data structures. initialize! # Create a context object that will be shared amongst all tokens for this # code. @ctx = Context.new(@code, path) reset end
Public Instance Methods
consume(num=1)
click to toggle source
# File lib/nasl/tokenizer.rb, line 173 def consume(num=1) # Update the index of the character we're currently looking at. @point += num # Update the flag that indicates whether we've reached the file's end. @eof = (@point >= @code.length) # Update the the character we're examining currently. @char = @code[@point] # Extract the remainder of the line. @line = @code[@point..@ctx.eol(@point)] end
die(msg)
click to toggle source
# File lib/nasl/tokenizer.rb, line 211 def die(msg) # We want the default context for token errors to be all lines that # contain the region. region = @ctx.bol(@mark)..@ctx.eol(@point) bt = @ctx.context(@mark..@point + 1, region) # Raise an exception with the context as our backtrace. raise TokenException, msg, bt end
get_comment()
click to toggle source
# File lib/nasl/tokenizer.rb, line 315 def get_comment # Remember the column the comment begins in. col = @ctx.col(@point) # Consume all of the comments in the block. block = [] begin prev = @ctx.row(@point) comment = @line[/^#.*$/] break if comment.nil? block << comment consume(comment.length) skip cur = @ctx.row(@point) end while @ctx.col(@point) == col && cur == prev + 1 return [:COMMENT, block.join("\n")] end
get_comment_c_style()
click to toggle source
# File lib/nasl/tokenizer.rb, line 334 def get_comment_c_style if @code[@point+1] == '/' comment = @line[/^\/\/.*$/] # Multi-line: /* comment here */ else newline = @code[@point..-1] comment = newline[/^\/\*.*?\*\//m] die("Unterminated multiline comment") if comment.nil? end consume(comment.length) skip return [:COMMENT, comment] end
get_identifier()
click to toggle source
# File lib/nasl/tokenizer.rb, line 221 def get_identifier # Identifiers are composed of letters, digits, and underscores. #ident = @line[/^[_a-z][_a-z0-9]*/i] # ident = @line[/^[_a-z]([_a-z0-9]*::[_a-z0-9]+)*[_a-z0-9]*/i] ident = @line[/^(::|[_a-z])([_a-z0-9]*::[_a-z0-9]+)*[_a-z0-9]*/i] consume(ident.length) # Assume that we've got an identifier until proven otherwise. type = :IDENT # Identifiers may be prefixed with keywords. One example of a valid # identifier is "break_". To ensure that we catch these cases, we # initially parse all keywords as identifiers and then convert them as # needed. type = @@keywords[ident] if @@keywords.has_key? ident return [type, ident] end
get_integer()
click to toggle source
# File lib/nasl/tokenizer.rb, line 240 def get_integer # Try and parse the integer in any of three bases. if @line =~ /^0x/i # Hex integers start with "0x". type = :INT_HEX name = "hex" regex1 = /^0x\w+/i regex2 = /^0x[a-f0-9]+/i elsif @line =~ /^0\w+/ # Octal integers start with "0". type = :INT_OCT name = "octal" regex1 = /^0\w+/ regex2 = /^0[0-7]+/ else # Anything else is a decimal integer. type = :INT_DEC name = "decimal" regex1 = /^\w*/ regex2 = /^[0-9]+/ end # First match with an overly permissive regex, and then match with the # proper regex. If the permissive and restrictive versions don't match, # then there's an error in the input. permissive = @line[regex1] restrictive = @line[regex2] if permissive.nil? || restrictive.nil? || permissive != restrictive # NASL interprets integers with a leading zero as octal if the only # contain octal digits, and considers the integers as decimal otherwise. type = :INT_DEC regex2 = /^[0-9]+/ restrictive = @line[regex2] end if permissive.nil? || restrictive.nil? || permissive != restrictive die("Invalid #{name} literal") end # If there was no problem, we use the restrictive version as the body of # our integer. integer = restrictive consume(integer.length) return [type, integer] end
get_operator()
click to toggle source
# File lib/nasl/tokenizer.rb, line 350 def get_operator line_prefixes = @@operator_lengths.map { |len| @line[0, len] } operators_that_matched = line_prefixes.map { |prefix| @@operators[prefix] } operators_that_matched.reject!(&:nil?) return nil if operators_that_matched.empty? op, type = operators_that_matched.sort { |a, b| a[2] <=> b[2] }.first consume(op.length) return [type, op] end
get_string()
click to toggle source
# File lib/nasl/tokenizer.rb, line 289 def get_string unparsed = @code[@point..-1] if @char == "'" type = :DATA # Single-quoted strings cannot have single-quotes stuffed inside them. contents = unparsed[/\A'(\\.|[^'\\])*'/m] die("Unterminated single-quoted string") if contents.nil? else type = :STRING # Double-quoted strings cannot have double quotes stuffed inside them. contents = unparsed[/\A"[^"]*"/m] die("Unterminated double-quoted string") if contents.nil? end # Move the point forward over the string. consume(contents.length) # Remove the bounding quotes. contents = contents[1..-2] return [type, contents] end
get_token()
click to toggle source
# File lib/nasl/tokenizer.rb, line 360 def get_token # If we deferred a token, emit it now. unless @deferred.nil? token = @deferred @deferred = nil return token end # Make sure we're not at the end of the file. return [false, Token.new(:EOF, "$", @point...@point, @ctx)] if @eof # Save our starting point, which to use Emacs terminology is called the # 'mark'. @mark = @point # Try to parse token at the point. token = if @char =~ /[_a-z]/i or @line =~ /^::/ get_identifier elsif @char =~ /['"]/ get_string elsif @char =~ /[0-9]/ get_integer elsif @char == '#' get_comment elsif (@char == '/') && ["/", "*"].include?(@code[@point+1]) get_comment_c_style else get_operator end # Everything in the language is enumerated by the above functions, so if # we get here without a token parsed, the input file is invalid. die("Invalid character ('#@char')") if token.nil? # Consume all whitespace after the token, and create an object with # context. skip token = [token.first, Token.new(*token, @mark...@point, @ctx)] # If a comment is the first token in a file, or is followed by certain # tokens, then it is considered significant. Such tokens will appear in # the grammar so that it can be made visible to nasldoc. if token.first == :COMMENT if @previous.nil? @previous = [:DUMMY, ""] else @previous = token token = get_token end elsif !@previous.nil? && @previous.first == :COMMENT && @@annotated.include?(token.first) @deferred = token token = @previous @previous = @deferred else @previous = token end return token end
get_tokens()
click to toggle source
# File lib/nasl/tokenizer.rb, line 420 def get_tokens tokens = [] begin tokens << get_token end while not tokens.last.last.type == :EOF return tokens end
initialize!()
click to toggle source
# File lib/nasl/tokenizer.rb, line 144 def initialize! return if @@initialized @@operator_lengths = @@operators.map { |op, type| op.length }.uniq # Convert the operators into a form that's fast to access. tmp = {} @@operators.each_with_index do |op_and_type, index| op, type = op_and_type tmp[op] = [op, type, index] end @@operators = tmp @@initialized = true end
reset()
click to toggle source
# File lib/nasl/tokenizer.rb, line 187 def reset # We need to remember the last token so we only emit comments significant # to nasldoc. @previous = nil @deferred = nil # Set tokenizer to initial state, ready to tokenize the code from the # start. @point = 0 consume(0) skip # Return tokenizer to allow method chaining. self end
skip()
click to toggle source
# File lib/nasl/tokenizer.rb, line 203 def skip while true do whitespace = @line[/^\s+/] return if whitespace.nil? consume(whitespace.length) end end