class Regextest::Front::CharClass::CharClass

Attributes

candidates[R]
length[R]
offset[R]

Public Class Methods

new(value, caller_type = nil) click to toggle source

Constructor

# File lib/regextest/front/char-class.rb, line 24
def initialize(value, caller_type = nil)
  TstLog("CharClass: #{value}")
  @@ascii_whole_set ||= get_ascii_whole_set
  @@unicode_whole_set ||= get_unicode_whole_set
  @caller_type = caller_type
  
  @options = nil
  case value
  when Array
    @candidates = value
    @offset = -1 # value[0].offset
    @length = -1 # value[-1].offset + value[-1].length - value[0].offset
  when TRange
    @candidates = [value]
    @offset = -1
    @length = -1
  when String   # value is a class names joined by "|"
    class_names = value
    @candidates = []
    class_names.split("|").each do | class_name |
      work = Regextest::Unicode.property(class_name) ||
        raise("Invalid Unicode class #{class_name}")
      # construct char class
      @candidates += work.map{|elem| TRange.new(elem[0], elem[1])}
    end
    @offset = -1
    @length = -1
  else
    @candidates = [value]
    @offset = value.offset
    @length = value.length
  end
  
  @is_reverse = false
  @whole_set = nil
  @other_char_classes = []
  
end

Public Instance Methods

add(value) click to toggle source

Add a letter to candidate letters

# File lib/regextest/front/char-class.rb, line 64
def add(value)
  TstLog("CharClass add: #{value}"); 
  @candidates.push value
  @length = value.offset - @offset + value.length
  self
end
add_ranges(ranges) click to toggle source

Add TRange objects

# File lib/regextest/front/char-class.rb, line 72
def add_ranges(ranges)
  TstLog("CharClass add_trange: #{ranges}"); 
  @candidates += ranges
  self
end
and(other_char_class) click to toggle source

set other char-set (AND(&&) notation)

# File lib/regextest/front/char-class.rb, line 129
def and(other_char_class)
  TstLog("CharClass and: #{other_char_class}");

  @other_char_classes.push other_char_class
  self
end
and_process(options) click to toggle source

AND process of candidates

# File lib/regextest/front/char-class.rb, line 137
def and_process(options)
  code_points = enumerate
  @other_char_classes.each do | other_char_class |
    other_char_class.set_options(options)
    code_points &= other_char_class.enumerate
  end
  
  # reconstructing valid character set using TRange objects
  @candidates = reconstruct_candidates(code_points)
end
enumerate() click to toggle source

enumerate nomimated letters

# File lib/regextest/front/char-class.rb, line 189
def enumerate
  TstLog("CharClass enumerate")
  @candidates.inject([]){|result, candidate| result += candidate.enumerate}
end
get_any_whole_set() click to toggle source

Get whole (assigned) unicode set

# File lib/regextest/front/char-class.rb, line 180
def get_any_whole_set
  if @@any_code_whole_set
    @@any_code_whole_set
  else
    @@any_code_whole_set = Regextest::Unicode.enumerate("assigned")
  end
end
get_ascii_whole_set() click to toggle source

Get whole code set of ascii

# File lib/regextest/front/char-class.rb, line 164
def get_ascii_whole_set
  ascii_set = Regextest::Unicode.enumerate("ascii")
  print_set = Regextest::Unicode.enumerate("print")
  ascii_set & print_set
end
get_unicode_whole_set() click to toggle source

Get code set of specified unicode classes( by environment variable)

# File lib/regextest/front/char-class.rb, line 171
def get_unicode_whole_set
  char_set = []
  TstConstUnicodeCharSet.split("|").each do | elem |
    char_set |= Regextest::Unicode.enumerate(elem)
  end
  char_set
end
get_whole_set(options) click to toggle source

Get whole code set

# File lib/regextest/front/char-class.rb, line 149
def get_whole_set(options)
  reg_options = options[:reg_options]
  if reg_options.is_unicode?
    whole_set = @@unicode_whole_set
  else
    whole_set = @@ascii_whole_set
  end
  
  if reg_options.is_multiline?
    whole_set |= ["\n"]
  end
  whole_set
end
ignore_process(options) click to toggle source

ignore process

# File lib/regextest/front/char-class.rb, line 195
def ignore_process(options)
  if options[:reg_options].is_ignore?
    alternatives = []
    @candidates.each do |candidate|
      candidate.enumerate.each do | letter |
        if alter = Regextest::Front::CaseFolding.ignore_case([letter])
          alternatives.push alter[0]
        end
      end
    end
    if alternatives.size > 0
      code_points = enumerate
      alternatives.each do | alternative |
        # ignore alternative is more than two letters
        code_points.push(alternative[0]) if(alternative.size == 1)
      end
      @candidates = reconstruct_candidates(code_points)
    end
  end
end
json() click to toggle source

transform to json format

# File lib/regextest/front/char-class.rb, line 245
def json
  #if @candidates.size > 1
    @@id += 1
    charset = @options[:reg_options].charset
    "{" +
      "\"type\": \"LEX_CHAR_CLASS\", \"id\": \"CC#{@@id}\", " +
      "\"offset\": #{@offset}, \"length\": #{@length}, " +
      "\"value\": [" + @candidates.map{|elem| elem.json}.join(",") +
      "], " +
      "\"charset\": \"#{charset}\"" +
    "}"
  #else
  #  @candidates[0].json
  #end
end
reconstruct_candidates(code_points) click to toggle source

Reconstruct candidate letters

# File lib/regextest/front/char-class.rb, line 110
def reconstruct_candidates(code_points)
  # Consecutive code points are reconstructed into a TRange object
  new_candidates = []
  if code_points.size > 0
    range_start = range_end = code_points.shift
    while(codepoint = code_points.shift)
      if(codepoint == range_end + 1)
        range_end = codepoint
      else
        new_candidates.push TRange.new(range_start, range_end)
        range_start = range_end = codepoint
      end
    end
    new_candidates.push TRange.new(range_start, range_end)
  end
  new_candidates
end
reverse() click to toggle source

reverse candidate letters (valid only in a bracket)

# File lib/regextest/front/char-class.rb, line 79
def reverse
  TstLog("CharClass reverse"); 
  @is_reverse = true
end
set_options(options) click to toggle source

fixes charset using options

# File lib/regextest/front/char-class.rb, line 217
def set_options(options)
  TstLog("CharClass set_options: #{options[:reg_options].inspect}")
  @options = options
  
  # call set_options of other bracket
  @candidates.each do |candidate|
    if candidate.respond_to?(:set_options)
      candidate.set_options(options)
    end
  end
  
  and_process(options) if @other_char_classes.size > 0

  # somehow ignore process must be skipped when unicode
  # class (\p{lower} etc.) in outside of bracket
  if @caller_type != :LEX_UNICODE_CLASS
    ignore_process(options)
  end
  
  # reverse char set
  if @is_reverse
    set_reverse(options)
  end

  self
end
set_reverse(options, default_whole_set = nil) click to toggle source
# File lib/regextest/front/char-class.rb, line 84
def set_reverse(options, default_whole_set = nil)
  TstLog("CharClass set_reverse")
  
  elems = @candidates.inject([]){|result, elem| result += elem.enumerate}
  
  # Calc whole set of letters (depends on language environment)
  @whole_set = default_whole_set || get_whole_set(options)

  # delete characters from whole set
  whole = @whole_set.sort
  if !default_whole_set && elems.find{| elem | !whole.bsearch {|x| elem <=> x }}
    if options[:reg_options].is_unicode?
      set_reverse(options, get_any_whole_set)
    else  # ascii or default mode
      set_reverse(options, @@unicode_whole_set)
    end
  else 
    whole -= elems
    
    # reconstructing valid character set using TRange objects
    @candidates = reconstruct_candidates(whole)
  end
  self
end