class TwitterCldr::Tokenizers::NumberTokenizer
Constants
- INVERSE_SPECIAL_SYMBOLS_MAP
- INVERSE_SPECIAL_SYMBOLS_REGEX
- SPECIAL_SYMBOLS_MAP
- SPECIAL_SYMBOLS_REGEX
Attributes
data_reader[R]
Public Class Methods
new(data_reader)
click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 28 def initialize(data_reader) @data_reader = data_reader end
Public Instance Methods
tokenize(pattern)
click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 32 def tokenize(pattern) escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match| SPECIAL_SYMBOLS_MAP[match[1..-2]] end tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern) tokens.each do |token| token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match| INVERSE_SPECIAL_SYMBOLS_MAP[match] end end if tokens.first.value == "" tokens[1..-1] else tokens end end
Private Instance Methods
tokenizer()
click to toggle source
# File lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb, line 54 def tokenizer @tokenizer ||= Tokenizer.new([ TokenRecognizer.new(:pattern, /[0?#,\.]+/), TokenRecognizer.new(:plaintext, //), ], /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/, false) end