class TwitterCldr::Shared::UnicodeRegex
Attributes
elements[R]
modifiers[R]
Public Class Methods
all_unicode()
click to toggle source
All unicode characters
# File lib/twitter_cldr/shared/unicode_regex.rb, line 21 def all_unicode @all_unicode ||= TwitterCldr::Utils::RangeSet.new( [0..0x10FFFF] ) end
compile(str, modifiers = "", symbol_table = nil)
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 12 def compile(str, modifiers = "", symbol_table = nil) new( parser.parse(tokenizer.tokenize(str), { symbol_table: symbol_table }), modifiers ) end
invalid_regexp_chars()
click to toggle source
A few <control> characters (i.e. 2..7) and public/private surrogates (i.e. 55296..57343). These don't play nicely with Ruby's regular expression engine, and I think we can safely disregard them.
# File lib/twitter_cldr/shared/unicode_regex.rb, line 30 def invalid_regexp_chars @invalid_regexp_chars ||= TwitterCldr::Utils::RangeSet.new( [2..7, 55296..57343] ) end
new(elements, modifiers = nil)
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 58 def initialize(elements, modifiers = nil) @elements = elements @modifiers = modifiers end
valid_regexp_chars()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 36 def valid_regexp_chars @valid_regexp_chars ||= all_unicode.subtract(invalid_regexp_chars) end
Private Class Methods
parser()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 46 def parser @parser ||= TwitterCldr::Parsers::UnicodeRegexParser.new end
tokenizer()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 42 def tokenizer @tokenizer ||= TwitterCldr::Tokenizers::UnicodeRegexTokenizer.new end
Public Instance Methods
to_regexp()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 63 def to_regexp @regexp ||= Regexp.new(to_regexp_str, modifier_union) end
to_regexp_str()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 67 def to_regexp_str @regexp_str ||= elements.map(&:to_regexp_str).join end
to_s()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 71 def to_s @elements.inject('') do |ret, element| ret + element.to_s end end
Private Instance Methods
modifier_union()
click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 79 def modifier_union @modifier_union ||= (modifiers || '').each_char.inject(0) do |ret, modifier_char| ret | case modifier_char when 'm' Regexp::MULTILINE when 'i' Regexp::IGNORECASE when 'x' Regexp::EXTENDED when 'n' Regexp::NOENCODING else 0 end end end