class TwitterCldr::Parsers::UnicodeRegexParser::CharacterClass

This is analogous to ICU's UnicodeSet class.

Constants

BinaryOperator

Character classes can include set operations (eg. union, intersection, etc).

GROUPING_PAIRS
UnaryOperator

Attributes

root[R]

Public Class Methods

closing_types() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 36
def closing_types
  @closing_types ||= GROUPING_PAIRS.keys
end
new(root) click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 46
def initialize(root)
  @root = root
end
opening_type_for(type) click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 40
def opening_type_for(type)
  GROUPING_PAIRS[type]
end
opening_types() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 32
def opening_types
  @opening_types ||= GROUPING_PAIRS.values
end

Public Instance Methods

codepoints() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 62
def codepoints
  codepoints_from(root)
end
negated?() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 70
def negated?
  root.type == :unary_operator && root.operator == :negate
end
to_regexp_str() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 54
def to_regexp_str
  set_to_regex(to_set)
end
to_s() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 66
def to_s
  stringify(root)
end
to_set() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 58
def to_set
  evaluate(root)
end
type() click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 50
def type
  :character_class
end

Private Instance Methods

codepoints_from(node) click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 78
def codepoints_from(node)
  case node
    when UnaryOperator
      codepoints_from(node.child)
    when BinaryOperator
      codepoints_from(node.left) + codepoints_from(node.right)
    else
      node.codepoints
  end
end
evaluate(node) click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 109
def evaluate(node)
  case node
    when UnaryOperator, BinaryOperator
      case node.operator
        when :negate
          TwitterCldr::Shared::UnicodeRegex.valid_regexp_chars.subtract(
            evaluate(node.child)
          )
        when :union, :pipe
          evaluate(node.left).union(
            evaluate(node.right)
          )
        when :dash
          evaluate(node.left).difference(
            evaluate(node.right)
          )
        when :ampersand
          evaluate(node.left).intersection(
            evaluate(node.right)
          )
      end

    else
      if node
        node.to_set
      else
        TwitterCldr::Utils::RangeSet.new([])
      end
  end
end
stringify(node) click to toggle source
# File lib/twitter_cldr/parsers/unicode_regex/character_class.rb, line 89
def stringify(node)
  case node
    when UnaryOperator, BinaryOperator
      op_str = case node.operator
        when :negate then '^'
        when :union, :pipe then ''
        when :dash then '-'
        when :ampersand then '&'
      end

      left = stringify(node.left)
      right = stringify(node.right)

      "#{left}#{op_str}#{right}"

    else
      node.to_s
  end
end