class Sie::Parser::Tokenizer

Public Instance Methods

tokenize() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 9
def tokenize
  tokens = []
  check_for_control_characters

  loop do
    case
    when whitespace?
      next
    when match = find_entry
      tokens << EntryToken.new(match)
    when begin_array?
      tokens << BeginArrayToken.new
    when end_array?
      tokens << EndArrayToken.new
    when match = find_string
      tokens << StringToken.new(match)
    when end_of_string?
      break
    else
      # We shouldn't get here, but if we do we need to bail out, otherwise we get an infinite loop.
      fail "Unhandled character in line at position #{scanner.pos}: " + scanner.string
    end
  end

  tokens
end

Private Instance Methods

begin_array?() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 58
def begin_array?
  scanner.scan(/#{Sie::Parser::BEGINNING_OF_ARRAY}/)
end
check_for_control_characters() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 38
def check_for_control_characters
  if /(.*?)([\x00-\x08\x0a-\x1f\x7f])/.match(line)
    fail "Unhandled character in line at position #{$1.length + 1}: " + scanner.string
  end
end
end_array?() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 62
def end_array?
  scanner.scan(/#{Sie::Parser::END_OF_ARRAY}/)
end
end_of_string?() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 76
def end_of_string?
  scanner.eos?
end
find_entry() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 48
def find_entry
  match = scanner.scan(/#\S+/)

  if match
    match.sub(/\A#/, "")
  else
    nil
  end
end
find_quoted_string() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 80
def find_quoted_string
  match = scanner.scan(/"(\\"|[^"])*"/)

  if match
    match.sub(/\A"/, "").sub(/"\z/, "")
  else
    nil
  end
end
find_string() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 66
def find_string
  match = find_quoted_string || find_unquoted_string

  if match
    remove_unnecessary_escapes(match)
  else
    nil
  end
end
find_unquoted_string() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 90
def find_unquoted_string
  scanner.scan(/\S+/)
end
remove_unnecessary_escapes(match) click to toggle source
# File lib/sie/parser/tokenizer.rb, line 94
def remove_unnecessary_escapes(match)
  match.gsub(/\\([\\"])/, "\\1")
end
scanner() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 98
def scanner
  @scanner ||= StringScanner.new(line)
end
whitespace?() click to toggle source
# File lib/sie/parser/tokenizer.rb, line 44
def whitespace?
  scanner.scan(/[ \t]+/)
end