class Regextest::Front::CharClass::CharClass
Attributes
candidates[R]
length[R]
offset[R]
Public Class Methods
new(value, caller_type = nil)
click to toggle source
Constructor
# File lib/regextest/front/char-class.rb, line 24 def initialize(value, caller_type = nil) TstLog("CharClass: #{value}") @@ascii_whole_set ||= get_ascii_whole_set @@unicode_whole_set ||= get_unicode_whole_set @caller_type = caller_type @options = nil case value when Array @candidates = value @offset = -1 # value[0].offset @length = -1 # value[-1].offset + value[-1].length - value[0].offset when TRange @candidates = [value] @offset = -1 @length = -1 when String # value is a class names joined by "|" class_names = value @candidates = [] class_names.split("|").each do | class_name | work = Regextest::Unicode.property(class_name) || raise("Invalid Unicode class #{class_name}") # construct char class @candidates += work.map{|elem| TRange.new(elem[0], elem[1])} end @offset = -1 @length = -1 else @candidates = [value] @offset = value.offset @length = value.length end @is_reverse = false @whole_set = nil @other_char_classes = [] end
Public Instance Methods
add(value)
click to toggle source
Add a letter to candidate letters
# File lib/regextest/front/char-class.rb, line 64 def add(value) TstLog("CharClass add: #{value}"); @candidates.push value @length = value.offset - @offset + value.length self end
add_ranges(ranges)
click to toggle source
Add TRange objects
# File lib/regextest/front/char-class.rb, line 72 def add_ranges(ranges) TstLog("CharClass add_trange: #{ranges}"); @candidates += ranges self end
and(other_char_class)
click to toggle source
set other char-set (AND(&&) notation)
# File lib/regextest/front/char-class.rb, line 129 def and(other_char_class) TstLog("CharClass and: #{other_char_class}"); @other_char_classes.push other_char_class self end
and_process(options)
click to toggle source
AND process of candidates
# File lib/regextest/front/char-class.rb, line 137 def and_process(options) code_points = enumerate @other_char_classes.each do | other_char_class | other_char_class.set_options(options) code_points &= other_char_class.enumerate end # reconstructing valid character set using TRange objects @candidates = reconstruct_candidates(code_points) end
enumerate()
click to toggle source
enumerate nomimated letters
# File lib/regextest/front/char-class.rb, line 189 def enumerate TstLog("CharClass enumerate") @candidates.inject([]){|result, candidate| result += candidate.enumerate} end
get_any_whole_set()
click to toggle source
Get whole (assigned) unicode set
# File lib/regextest/front/char-class.rb, line 180 def get_any_whole_set if @@any_code_whole_set @@any_code_whole_set else @@any_code_whole_set = Regextest::Unicode.enumerate("assigned") end end
get_ascii_whole_set()
click to toggle source
Get whole code set of ascii
# File lib/regextest/front/char-class.rb, line 164 def get_ascii_whole_set ascii_set = Regextest::Unicode.enumerate("ascii") print_set = Regextest::Unicode.enumerate("print") ascii_set & print_set end
get_unicode_whole_set()
click to toggle source
Get code set of specified unicode classes( by environment variable)
# File lib/regextest/front/char-class.rb, line 171 def get_unicode_whole_set char_set = [] TstConstUnicodeCharSet.split("|").each do | elem | char_set |= Regextest::Unicode.enumerate(elem) end char_set end
get_whole_set(options)
click to toggle source
Get whole code set
# File lib/regextest/front/char-class.rb, line 149 def get_whole_set(options) reg_options = options[:reg_options] if reg_options.is_unicode? whole_set = @@unicode_whole_set else whole_set = @@ascii_whole_set end if reg_options.is_multiline? whole_set |= ["\n"] end whole_set end
ignore_process(options)
click to toggle source
ignore process
# File lib/regextest/front/char-class.rb, line 195 def ignore_process(options) if options[:reg_options].is_ignore? alternatives = [] @candidates.each do |candidate| candidate.enumerate.each do | letter | if alter = Regextest::Front::CaseFolding.ignore_case([letter]) alternatives.push alter[0] end end end if alternatives.size > 0 code_points = enumerate alternatives.each do | alternative | # ignore alternative is more than two letters code_points.push(alternative[0]) if(alternative.size == 1) end @candidates = reconstruct_candidates(code_points) end end end
json()
click to toggle source
transform to json format
# File lib/regextest/front/char-class.rb, line 245 def json #if @candidates.size > 1 @@id += 1 charset = @options[:reg_options].charset "{" + "\"type\": \"LEX_CHAR_CLASS\", \"id\": \"CC#{@@id}\", " + "\"offset\": #{@offset}, \"length\": #{@length}, " + "\"value\": [" + @candidates.map{|elem| elem.json}.join(",") + "], " + "\"charset\": \"#{charset}\"" + "}" #else # @candidates[0].json #end end
reconstruct_candidates(code_points)
click to toggle source
Reconstruct candidate letters
# File lib/regextest/front/char-class.rb, line 110 def reconstruct_candidates(code_points) # Consecutive code points are reconstructed into a TRange object new_candidates = [] if code_points.size > 0 range_start = range_end = code_points.shift while(codepoint = code_points.shift) if(codepoint == range_end + 1) range_end = codepoint else new_candidates.push TRange.new(range_start, range_end) range_start = range_end = codepoint end end new_candidates.push TRange.new(range_start, range_end) end new_candidates end
reverse()
click to toggle source
reverse candidate letters (valid only in a bracket)
# File lib/regextest/front/char-class.rb, line 79 def reverse TstLog("CharClass reverse"); @is_reverse = true end
set_options(options)
click to toggle source
fixes charset using options
# File lib/regextest/front/char-class.rb, line 217 def set_options(options) TstLog("CharClass set_options: #{options[:reg_options].inspect}") @options = options # call set_options of other bracket @candidates.each do |candidate| if candidate.respond_to?(:set_options) candidate.set_options(options) end end and_process(options) if @other_char_classes.size > 0 # somehow ignore process must be skipped when unicode # class (\p{lower} etc.) in outside of bracket if @caller_type != :LEX_UNICODE_CLASS ignore_process(options) end # reverse char set if @is_reverse set_reverse(options) end self end
set_reverse(options, default_whole_set = nil)
click to toggle source
# File lib/regextest/front/char-class.rb, line 84 def set_reverse(options, default_whole_set = nil) TstLog("CharClass set_reverse") elems = @candidates.inject([]){|result, elem| result += elem.enumerate} # Calc whole set of letters (depends on language environment) @whole_set = default_whole_set || get_whole_set(options) # delete characters from whole set whole = @whole_set.sort if !default_whole_set && elems.find{| elem | !whole.bsearch {|x| elem <=> x }} if options[:reg_options].is_unicode? set_reverse(options, get_any_whole_set) else # ascii or default mode set_reverse(options, @@unicode_whole_set) end else whole -= elems # reconstructing valid character set using TRange objects @candidates = reconstruct_candidates(whole) end self end