class LineParser::Tokenizer

Attributes

special_token_re[R]

Public Class Methods

setup(special_tokens, unescaped_special_tokens=[]) click to toggle source
# File lib/log_line_parser/line_parser.rb, line 23
def setup(special_tokens, unescaped_special_tokens=[])
  @special_tokens = special_tokens
  @unescaped_special_tokens = unescaped_special_tokens
  @scanner = StringScanner.new("".freeze)
  @special_token_re = compose_re(@special_tokens)
end
tokenize(str, tokens=[]) click to toggle source
# File lib/log_line_parser/line_parser.rb, line 8
def tokenize(str, tokens=[])
  @scanner.string = str
  cur_pos = 0 # instead of @scanner.pos
  while chunk_size = @scanner.skip_until(@special_token_re)
    token = @scanner.matched
    pre_match_size = chunk_size - token.bytesize
    tokens.push str.byteslice(cur_pos, pre_match_size) if pre_match_size > 0
    tokens.push token
    cur_pos += chunk_size
  end

  tokens.push @scanner.rest unless @scanner.eos?
  tokens
end

Private Class Methods

compose_re(special_tokens) click to toggle source
# File lib/log_line_parser/line_parser.rb, line 39
def compose_re(special_tokens)
  tokens_str = compose_special_tokens_str(special_tokens)
  return Regexp.compile(tokens_str)
end
compose_special_tokens_str(special_tokens) click to toggle source
# File lib/log_line_parser/line_parser.rb, line 32
def compose_special_tokens_str(special_tokens)
  sorted = special_tokens.sort {|x, y| y.length <=> x.length }
  escaped = sorted.map {|token| Regexp.escape(token) }
  escaped.concat @unescaped_special_tokens if @unescaped_special_tokens
  escaped.join('|')
end