class Regextest::Back::Main

Main class of back-end. Construct candidates array and narrow down

Public Class Methods

new(json_obj, max_nest, retry_count = 0) click to toggle source
# File lib/regextest/back/main.rb, line 12
def initialize(json_obj, max_nest, retry_count = 0)
  @json_obj = json_obj
  @max_nest = max_nest
  @past_max_nest = 0 # max nest of the past
  @retry_count = retry_count
  @parens_hash = {}  # hash to keep string generated by parentheses
  @nest = 0          # current nest of back-reference
  @quit_mode = false # flag for preventing from increase of nest
                     # if true, \g<foo> is restrained if possible
end

Public Instance Methods

generate() click to toggle source
# File lib/regextest/back/main.rb, line 23
def generate
  # seek parentheses because there are references defined ahead
  seek_parens(@json_obj)
  
  # generate pre-result of matched string (pre-result contains candidates of letters)
  param = {}
  pre_result = generate_candidates(@json_obj, param)
  return nil unless pre_result
  TstLog("pre_result1:\n" + pre_result.map{|elem| elem.inspect}.join("\n"))

  # narrow down the candidates
  result = narrow_down_candidates(pre_result)
  TstLog("pre_result2:\n" + result.inspect)
  return nil if !result || !result.narrow_down
  # fixes result
  result.fix
  
  result
end
generate_candidates(target, param) click to toggle source

generate pre-result of matched string (pre-result contains candidates of letters)

# File lib/regextest/back/main.rb, line 58
def generate_candidates(target, param)
  # puts "MATCH type:#{target["type"]}"
  
  result = nil
  case target["type"]
  when "LEX_SEQ"  # sequence of letters or parentheses
    result = generate_candidates_seq(target, param)
  when "LEX_SELECT"
    result = generate_candidates_select(target, param)
  when "LEX_PAREN"
    result = generate_candidates_paren(target, param)
  when "LEX_CHAR_CLASS"
    result = generate_candidates_char_class(target, param)
  when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS", "LEX_UNICODE_CLASS_BRACKET"
    result = generate_candidates(target["value"], param)
  when "LEX_REPEAT"
    result = generate_candidates_repeat(target, param)
  when "LEX_RANGE"
    result = generate_candidates_range(target, param)
  when "LEX_BACK_REFER", "LEX_NAMED_REFER"
    result = generate_candidates_back_refer(target, param)
  when "LEX_NAMED_GENERATE"
    result = generate_candidates_named_generate(target, param)
  when "LEX_CHAR"
    result = generate_candidates_char(target, param)
  when "LEX_ANC_LINE_BEGIN"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_BEGIN})
  when "LEX_ANC_LINE_END"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_END})
  when "LEX_ANC_WORD_BOUND"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_BOUND})
  when "LEX_ANC_WORD_UNBOUND"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_UNBOUND})
  when "LEX_ANC_STRING_BEGIN"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_BEGIN})
  when "LEX_ANC_STRING_END"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END})
  when "LEX_ANC_STRING_END2"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END2})
  when "LEX_ANC_MATCH_START"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_MATCH_START})
  when "LEX_ANC_LOOK_BEHIND2"
    result = Regextest::Back::Element.new({cmd: :CMD_ANC_LOOK_BEHIND2})
  when "LEX_OPTION_PAREN"     # options are processed at front-end
    result = []
  when "LEX_EMPTY"
    result = []
  else
    raise "#{target["type"]} not implemented (from generate_candidates routine)"
  end
  result
end
generate_candidates_back_refer(target, param) click to toggle source

back_refer

# File lib/regextest/back/main.rb, line 323
def generate_candidates_back_refer(target, param)
  if @parens_hash[target["refer_name"]][:generated]
    relative_num = -1   # default value
    if target["relative_num"] != ""
      work = @nest + target["relative_num"].to_i
      return nil if(work < 0 || !@parens_hash[target["refer_name"]][:generated][work])
      relative_num = work
    end
    # puts "relative: #{relative_num}, nest=#{@nest}, :#{target}"
    result = @parens_hash[target["refer_name"]][:generated][relative_num]
    
    # Somehow /(^a)\1/ or /(\ba)\1/ must match with "aa"
    if result.size > 0
      result = result.select{|elem| elem.command == :CMD_SELECT}
    end
  else
    result = nil
  end
  result
end
generate_candidates_char(target, param) click to toggle source

char

# File lib/regextest/back/main.rb, line 363
def generate_candidates_char(target, param)
  charset = target["charset"]
  case target["value"]
  when String
    codepoint = target["value"].unpack("U*")[0]
    result = Regextest::Back::Element.new(
      {
        cmd: :CMD_SELECT, 
        ranges: [codepoint..codepoint],
        charset: charset
      }
    )
  else
    result = generate_candidates(target["value"], param)
  end
  result
end
generate_candidates_char_class(target, param) click to toggle source

char class

# File lib/regextest/back/main.rb, line 243
def generate_candidates_char_class(target, param)
  charset = target["charset"]
  results = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [], charset: charset})
  target["value"].each do | elem |
    if sub_results = generate_candidates(elem, param)
      results.union sub_results
    end
  end
  if results.size > 0
    result = results
  else
    result = nil
  end
  result
end
generate_candidates_named_generate(target, param) click to toggle source

named generate

# File lib/regextest/back/main.rb, line 345
def generate_candidates_named_generate(target, param)
  @quit_mode = true if(@nest >= @max_nest)
  if(@quit_mode)
    result = nil
  else
    @nest += 1
    @past_max_nest = @nest if @nest > @past_max_nest
    if target["refer_name"] == "$$_0"     # recursively call whole expression
      result = generate_candidates(@json_obj, param)
    else
      result = generate_candidates(@parens_hash[target["refer_name"]][:target], param)
    end
    @nest -= 1
  end
  result
end
generate_candidates_paren(target, param) click to toggle source

parenthesis

# File lib/regextest/back/main.rb, line 190
def generate_candidates_paren(target, param)
  # analyze options of the parenthesis
  paren_prefix = target["prefix"]
  # pp target["prefix"]
  if(paren_prefix == "<=")
    lb_result = generate_candidates(target["value"], param)
    result = Regextest::Back::Element.new({cmd: :CMD_LOOK_BEHIND, result: lb_result})
  elsif(paren_prefix == "=")
    la_result = generate_candidates(target["value"], param)
    result = Regextest::Back::Element.new({cmd: :CMD_LOOK_AHEAD, result: la_result})
  elsif(paren_prefix == "<!")
    param[:negative] = :CMD_NOT_LOOK_BEHIND
    result = generate_candidates(target["value"], param)
  elsif(paren_prefix == "!")
    param[:negative] = :CMD_NOT_LOOK_AHEAD
    result = generate_candidates(target["value"], param)
  elsif(paren_prefix == ">")   # atomic group
    param[:atomic] = true
    generate_string = generate_candidates(target["value"], param)
    @parens_hash[target["refer_name"]][:generated] ||= []
    @parens_hash[target["refer_name"]][:generated][@nest] = generate_string
    result = generate_string
  elsif(paren_prefix == "")   # simple parenthesis
    generate_string = generate_candidates(target["value"], param)
    @parens_hash[target["refer_name"]][:generated] ||= []
    @parens_hash[target["refer_name"]][:generated][@nest] = generate_string
    result = generate_string
  else
    # when condition is specified
    select_num = nil
    if(target["condition_name"] && target["condition_name"].length > 0)
      if @parens_hash[target["condition_name"]][:generated]
        select_num = 0
      else
        select_num = 1
      end
    end
    
    if(select_num == 1 && target["value"]["type"] != "LEX_SELECT")
      result = nil
    else
      param[:forced_select] = select_num
      generate_string = generate_candidates(target["value"], param)
      
      @parens_hash[target["refer_name"]][:generated] ||= []
      @parens_hash[target["refer_name"]][:generated][@nest] = generate_string
      result = generate_string
    end
  end
  result
end
generate_candidates_range(target, param) click to toggle source

range

# File lib/regextest/back/main.rb, line 315
def generate_candidates_range(target, param)
  charset = target["charset"]
  letter = []
  codepoints = (target["begin"]..target["end"])
  result = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [codepoints], charset: charset})
end
generate_candidates_repeat(target, param) click to toggle source

repeat

# File lib/regextest/back/main.rb, line 260
def generate_candidates_repeat(target, param)
  max_repeat = target["max_repeat"]
  min_repeat = target["min_repeat"]
  
  # reduce repeat count if retry and there are one or more \g<foo> calls
  if @retry_count > 0 && @past_max_nest > 0
    @retry_count.times{ max_repeat = (max_repeat + 1)/2 }
  end
 
  if(@quit_mode)
    repeat = min_repeat
  elsif(max_repeat > min_repeat)
    repeat = min_repeat + TstRand(max_repeat - min_repeat + 1)
  else
    repeat = min_repeat
  end
  
  result = []
  if target["repeat_option"].index("reluctant")
    result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_BEGIN, id: target["id"]})
  end
  
  # puts "repeat=#{repeat} quit=#{@quit_mode} nest=#{@nest}"
  repeat.times do | current_repeat |
    if( elem = generate_candidates(target["value"], param))
      result.push elem
    else
      result = nil
      break
    end
    
    elem.flatten! if Array === elem  # flatten considering duplicated repeat
    
    # quit to repeat if the first element is begin anchor
    if current_repeat >= min_repeat &&
       elem.size > 0 &&
       elem[0].respond_to?(:command) &&
       elem[-1].respond_to?(:command)
      break if elem[0].command == :CMD_ANC_LINE_BEGIN && !elem[-1].new_line?
      break if elem[0].command == :CMD_ANC_STRING_BEGIN
    end
  end
  
  if target["repeat_option"].index("reluctant")
    result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_END, id: target["id"]})
  end
  
  if target["repeat_option"].index("possessive")
    la_result = [ generate_candidates(target["value"], param) ].flatten
    result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
  end
  result
end
generate_candidates_select(target, param) click to toggle source

selection of sequence. such as (aa|b|c)

# File lib/regextest/back/main.rb, line 134
def generate_candidates_select(target, param)
  if param[:forced_select]
    # index is specified by condition
    offset = param[:forced_select]
    param.delete :forced_select
    if target["value"][offset]
      result = generate_candidates(target["value"][offset], param)
    else
      # regexp such as /^(?:b|(a))(?(1)1)$/ match "b"!
      result = []
    end
  else
    # success if there is at least one result
    offsets = (0 ... target["value"].size).to_a
    
    # shuffle if element size more than 1
    offsets = TstShuffle(offsets)  if offsets.size > 1

    result = nil
    if param[:atomic]
      param.delete :atomic
      # if atomic, assure proceeding results not appeared
      offsets.each do | offset |
        result = []
        (0...offset).each do | prev |
          la_result = generate_candidates(target["value"][prev], param)
          result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
        end
        result.push generate_candidates(target["value"][offset], param)
        break if(!result.find{|elem| !elem })
      end
    elsif negative_type = param[:negative]
      # if negative, assure all results not appeared
      result = []
      offsets.each do | offset |
        la_result = generate_candidates(target["value"][offset], param)
        la_result.each do | elem |
          if elem.command == :CMD_NOT_LOOK_AHEAD
            result.push elem
          else
            result.push Regextest::Back::Element.new({cmd: negative_type, result: la_result})
          end
        end
      end
      param.delete :negative
    else
      offsets.each do | offset |
        result = generate_candidates(target["value"][offset], param)
        break if(result)
      end
    end
  end
  result
end
generate_candidates_seq(target, param) click to toggle source

sequence of letters or parentheses

# File lib/regextest/back/main.rb, line 112
def generate_candidates_seq(target, param)
  results = []
  target["value"].each do |elem|
    generated_string = generate_candidates(elem, param)
    if(Array === generated_string)
      generated_string.flatten!(1)
      results += generated_string
    else
      results.push generated_string
    end
  end
  # nil if one element failed
  if(results.index(nil))
    result = nil
  else
    # result = results.join("")
    result = results
  end
  result
end
narrow_down_candidates(candidate_array) click to toggle source

narrow down candidates considering anchors

# File lib/regextest/back/main.rb, line 382
def narrow_down_candidates(candidate_array)
  # pp candidate_array
  results = Regextest::Back::Result.new
  candidate_array.each do | elem |
    command = elem.command
    case command
    when :CMD_SELECT
      results.push_body elem
    when :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_AHEAD
      if(sub_results = narrow_down_candidates(elem.param[:result]))
        results.add_look_ahead(command, sub_results)
      else
        return nil
      end
    when :CMD_LOOK_BEHIND, :CMD_NOT_LOOK_BEHIND
      if(sub_results = narrow_down_candidates(elem.param[:result]))
        results.add_look_behind(command, sub_results)
      else
        return nil
      end
    when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
         :CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
         :CMD_ANC_LOOK_BEHIND2
      results.add_anchor(command)
    when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END,
         :CMD_ANC_POSSESSIVE_BEGIN, :CMD_ANC_POSSESSIVE_END
      results.add_reluctant_repeat(elem)
    else
      raise "inner error, invalid command at checking anchors: #{command}"
    end
  end
  if !results.merge
    return nil
  end
  results
end
seek_parens(target) click to toggle source

seek parentheses

# File lib/regextest/back/main.rb, line 44
def seek_parens(target)
  if(target["type"] == "LEX_PAREN")
    @parens_hash[target["refer_name"]] = {:target => target}
  end
  if(target["value"])
    if( Array === target["value"])
      target["value"].each{|child| seek_parens(child)}
    else
      seek_parens(target["value"])
    end
  end
end