class TwitterCldr::Shared::UnicodeRegex

Attributes

elements[R]
modifiers[R]

Public Class Methods

all_unicode() click to toggle source

All unicode characters

# File lib/twitter_cldr/shared/unicode_regex.rb, line 21
def all_unicode
  @all_unicode ||= TwitterCldr::Utils::RangeSet.new(
    [0..0x10FFFF]
  )
end
compile(str, modifiers = "", symbol_table = nil) click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 12
def compile(str, modifiers = "", symbol_table = nil)
  new(
    parser.parse(tokenizer.tokenize(str), {
      symbol_table: symbol_table
    }), modifiers
  )
end
invalid_regexp_chars() click to toggle source

A few <control> characters (i.e. 2..7) and public/private surrogates (i.e. 55296..57343). These don't play nicely with Ruby's regular expression engine, and I think we can safely disregard them.

# File lib/twitter_cldr/shared/unicode_regex.rb, line 30
def invalid_regexp_chars
  @invalid_regexp_chars ||= TwitterCldr::Utils::RangeSet.new(
    [2..7, 55296..57343]
  )
end
new(elements, modifiers = nil) click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 58
def initialize(elements, modifiers = nil)
  @elements = elements
  @modifiers = modifiers
end
valid_regexp_chars() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 36
def valid_regexp_chars
  @valid_regexp_chars ||= all_unicode.subtract(invalid_regexp_chars)
end

Private Class Methods

parser() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 46
def parser
  @parser ||= TwitterCldr::Parsers::UnicodeRegexParser.new
end
tokenizer() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 42
def tokenizer
  @tokenizer ||= TwitterCldr::Tokenizers::UnicodeRegexTokenizer.new
end

Public Instance Methods

to_regexp() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 63
def to_regexp
  @regexp ||= Regexp.new(to_regexp_str, modifier_union)
end
to_regexp_str() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 67
def to_regexp_str
  @regexp_str ||= elements.map(&:to_regexp_str).join
end
to_s() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 71
def to_s
  @elements.inject('') do |ret, element|
    ret + element.to_s
  end
end

Private Instance Methods

modifier_union() click to toggle source
# File lib/twitter_cldr/shared/unicode_regex.rb, line 79
def modifier_union
  @modifier_union ||=
    (modifiers || '').each_char.inject(0) do |ret, modifier_char|
      ret | case modifier_char
        when 'm'
          Regexp::MULTILINE
        when 'i'
          Regexp::IGNORECASE
        when 'x'
          Regexp::EXTENDED
        when 'n'
          Regexp::NOENCODING
        else
          0
      end
    end
end