class TwitterCldr::Tokenizers::NumberTokenizer

Constants

INVERSE_SPECIAL_SYMBOLS_MAP
INVERSE_SPECIAL_SYMBOLS_REGEX
SPECIAL_SYMBOLS_MAP
SPECIAL_SYMBOLS_REGEX

Attributes

data_reader[R]

Public Class Methods

new(data_reader) click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 28
def initialize(data_reader)
  @data_reader = data_reader
end

Public Instance Methods

tokenize(pattern) click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 32
def tokenize(pattern)
  escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match|
    SPECIAL_SYMBOLS_MAP[match[1..-2]]
  end

  tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern)

  tokens.each do |token|
    token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match|
      INVERSE_SPECIAL_SYMBOLS_MAP[match]
    end
  end

  if tokens.first.value == ""
    tokens[1..-1]
  else
    tokens
  end
end

Private Instance Methods

tokenizer() click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 54
def tokenizer
  @tokenizer ||= Tokenizer.new([
    TokenRecognizer.new(:pattern, /[0?#,\.]+/),
    TokenRecognizer.new(:plaintext, //),
  ], /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/, false)
end