class PuppetDBQuery::Tokenizer

tokenize puppetdb queries FIXME: distinguish between language tokens and other tokens rubocop:disable Metrics/ClassLength

Constants

DOUBLE_CHAR_TO_TOKEN
LANGUAGE_STRINGS
LANGUAGE_TOKENS
SINGLE_CHAR_TO_TOKEN
STRING_TO_TOKEN

Attributes

position[R]
text[R]

Public Class Methods

idem(query) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 61
def self.idem(query)
  query(symbols(query))
end
new(text) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 68
def initialize(text)
  @text = text
  @position = 0
end
query(symbols) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 53
def self.query(symbols)
  symbols.map { |v| symbol_to_string(v) }.join(" ")
end
symbol_to_string(s) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 57
def self.symbol_to_string(s)
  (LANGUAGE_STRINGS[s] || (s.is_a?(Symbol) ? s.to_s : nil) || s.inspect).to_s
end
symbols(query) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 46
def self.symbols(query)
  r = []
  tokenizer = Tokenizer.new(query)
  r << tokenizer.next_token until tokenizer.empty?
  r
end

Public Instance Methods

each() { |next_token until empty?| ... } click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 83
def each
  yield next_token until empty?
end
empty?() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 79
def empty?
  position >= text.size
end
next_token() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 73
def next_token
  skip_whitespace
  return nil if empty?
  read_token
end

Private Instance Methods

error(message) click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 194
def error(message)
  raise "tokenizing query failed\n#{message}\n\n#{text}\n#{' ' * position}^"
end
increase() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 188
def increase
  # logger.debug "increase"
  @position += 1
  # logger.debug position
end
read_number() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 168
def read_number
  logger.debug "  read number"
  skip_whitespace
  r = ""
  while !empty? && (c = text[position]) =~ /[-0-9\.E]/
    r << c
    increase
  end
  logger.debug "  resulting number: '#{r}'"
  Integer(r)
rescue
  Float(r)
end
read_quoted() click to toggle source

rubocop:disable Metrics/PerceivedComplexity,Style/ConditionalAssignment

# File lib/puppetdb_query/tokenizer.rb, line 119
def read_quoted
  logger.debug "  read quoted"
  skip_whitespace
  q = text[position] # quote character
  increase
  r = ""
  while !empty? && (c = text[position]) != q
    if c == "\\"
      increase
      c = text[position] unless empty?
      case c
      when 'r'
        c = "\r"
      when 'n'
        c = "\n"
      when '\''
        c = "\'"
      when '\\'
        c = "\\"
      else
        c = "\\" + c
      end
    end
    r << c
    increase
  end
  error("I expected #{q} but I got #{c}") if c != q
  increase
  logger.debug "  resulting string: '#{r}'"
  r
end
read_symbol() click to toggle source

rubocop:enable Metrics/PerceivedComplexity,Style/ConditionalAssignment

# File lib/puppetdb_query/tokenizer.rb, line 152
def read_symbol
  logger.debug "  read symbol"
  skip_whitespace
  r = ""
  while !empty? && (c = text[position]) =~ /[-a-zA-Z_0-9]/
    r << c
    increase
  end
  s = STRING_TO_TOKEN[r]
  s = r.to_sym unless s
  s = true if s == :true
  s = false if s == :false
  logger.debug "  resulting symbol: #{s}"
  s
end
read_token() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 89
def read_token
  logger.debug "read token"
  skip_whitespace
  return nil if empty?
  s = text[position, 2]
  if DOUBLE_CHAR_TO_TOKEN.include?(s)
    increase
    increase
    logger.debug "  resulting symbol: #{DOUBLE_CHAR_TO_TOKEN[s]}"
    return DOUBLE_CHAR_TO_TOKEN[s]
  end
  c = text[position]
  if SINGLE_CHAR_TO_TOKEN.include?(c)
    increase
    logger.debug "  resulting symbol: #{SINGLE_CHAR_TO_TOKEN[c]}"
    return SINGLE_CHAR_TO_TOKEN[c]
  end
  case c
  when /[a-zA-Z]/
    return read_symbol
  when "'", '"'
    return read_quoted
  when /[-0-9]/
    return read_number
  else
    error("unknown kind of token: '#{c}'")
  end
end
skip_whitespace() click to toggle source
# File lib/puppetdb_query/tokenizer.rb, line 182
def skip_whitespace
  # logger.debug "skip whitespace"
  return if empty?
  increase until empty? || text[position] !~ /\s/
end