class Regextest::Back::Result

Attributes

end_offset[R]
match[R]
positional_anchors[R]
post_match[R]
pre_match[R]
results[R]

Public Class Methods

new() click to toggle source
# File lib/regextest/back/result.rb, line 11
def initialize()
  @results = []
  @look_aheads = []
  @look_behinds = []
  @positional_anchors = {}
  @reluctant_repeat = {}
  @possessive_repeat = {}
  @start_offset = 0
  @end_offset = 0
  @pre_match = nil
  @match = nil
  @post_match = nil
end

Public Instance Methods

[](offset) click to toggle source

Offset of an elem

# File lib/regextest/back/result.rb, line 37
def [](offset)
  @results[offset]
end
add_anchor(cmd) click to toggle source

Adds offset of anchor

# File lib/regextest/back/result.rb, line 57
def add_anchor(cmd)
  @positional_anchors[cmd] ||= []
  @positional_anchors[cmd].push @end_offset
end
add_look_ahead(command, sub_results) click to toggle source

Adds results of look_ahead

# File lib/regextest/back/result.rb, line 47
def add_look_ahead(command, sub_results)
  @look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
end
add_look_behind(command, sub_results) click to toggle source

Adds results of look_behind

# File lib/regextest/back/result.rb, line 52
def add_look_behind(command, sub_results)
  @look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
end
add_reluctant_repeat(elem) click to toggle source

Adds reluctant / possessive repeat information

# File lib/regextest/back/result.rb, line 63
def add_reluctant_repeat(elem)
  repeat_id = elem.param[:id]
  case elem.command
  when :CMD_ANC_RELUCTANT_BEGIN
    @reluctant_repeat[repeat_id] = [@end_offset]
  when :CMD_ANC_RELUCTANT_END
    if @reluctant_repeat[repeat_id]
      @reluctant_repeat[repeat_id].push @end_offset
    else
      raise "internal error, invalid reluctant_repeat_end command"
    end
  when :CMD_ANC_POSSESSIVE_BEGIN
    @possessive_repeat[repeat_id] = [@end_offset]
  when :CMD_ANC_POSSESSIVE_END
    if @possessive_repeat[repeat_id]
      @possessive_repeat[repeat_id].push @end_offset
    else
      raise "internal error, invalid possessive_repeat_end command"
    end
  else
    raise "internal error, invalid reluctant / possessive repeat command"
  end
end
bound_process(elem1, elem2) click to toggle source

bound process (b)

# File lib/regextest/back/result.rb, line 452
def bound_process(elem1, elem2)
  if    elem1.word_elements?
    elem2.set_non_word_elements
  elsif elem1.non_word_elements?
    elem2.set_word_elements
  elsif elem2.word_elements?
    elem1.set_non_word_elements
  elsif elem2.non_word_elements?
    elem1.set_word_elements
  else
    if(TstRand(2)==0)
      elem1.set_word_elements
      elem2.set_non_word_elements
    else
      elem1.set_non_word_elements
      elem2.set_word_elements
    end
  end
  if elem1.empty? || elem2.empty?
    return false
  end
  true
end
fix() click to toggle source

Fixes results

# File lib/regextest/back/result.rb, line 520
def fix
  @pre_match  = fix_part(0, @start_offset-1)
  @match      = fix_part(@start_offset, @end_offset-1)
  @post_match = fix_part(@end_offset, @results.size-1)
  
  @pre_match + @match + @post_match
end
fix_part(start_offset, end_offset) click to toggle source

Fixes part of results

# File lib/regextest/back/result.rb, line 529
def fix_part(start_offset, end_offset)
  result = ""
  start_offset.step(end_offset).each do | i |
    result += @results[i].random_fix
  end
  result
end
is_begin_anchor?() click to toggle source

return true if begin anchor exists

# File lib/regextest/back/result.rb, line 502
def is_begin_anchor?
  (@positional_anchors[:CMD_ANC_STRING_BEGIN] &&
   @positional_anchors[:CMD_ANC_STRING_BEGIN][0] == 0) ||
  (@positional_anchors[:CMD_ANC_LINE_BEGIN] &&
   @positional_anchors[:CMD_ANC_LINE_BEGIN][0] == 0)
end
is_end_anchor?() click to toggle source

return true if end anchor exists

# File lib/regextest/back/result.rb, line 510
def is_end_anchor?
  (@positional_anchors[:CMD_ANC_STRING_END] &&
   @positional_anchors[:CMD_ANC_STRING_END][-1] == @results.size) ||
  (@positional_anchors[:CMD_ANC_STRING_END2] &&
   @positional_anchors[:CMD_ANC_STRING_END2][-1] == @results.size) ||
  (@positional_anchors[:CMD_ANC_LINE_END] &&
   @positional_anchors[:CMD_ANC_LINE_END][-1] == @results.size)
end
merge() click to toggle source

Merge results of look aheads / behinds

# File lib/regextest/back/result.rb, line 88
def merge
  merge_look_ahead && 
  merge_look_behind
end
merge_anchors(offset, sub_results) click to toggle source

Merge anchors

# File lib/regextest/back/result.rb, line 288
def merge_anchors(offset, sub_results)
  sub_results.positional_anchors.each do | key, value |
    @positional_anchors[key] ||= []
    @positional_anchors[key] |= value.map{|elem| elem + offset}
  end
end
merge_look_ahead() click to toggle source

Merge results of look aheads

# File lib/regextest/back/result.rb, line 94
def merge_look_ahead
  @look_aheads.each do | elem |
    offset = elem[:offset]
    sub_results = elem[:results]
    command = elem[:cmd]
    
    merge_anchors(offset, sub_results)
    case command
    when :CMD_LOOK_AHEAD
      if !merge_look_ahead_elems(offset, sub_results)
        return nil
      end
    when :CMD_NOT_LOOK_AHEAD
      if !merge_not_look_ahead_elems(offset, sub_results)
        return nil
      end
    else
      raise "invalid command at merge_look_ahead: #{command}"
    end
  end
  true
end
merge_look_ahead_elems(offset, sub_results) click to toggle source

Merge each elements of look aheads

# File lib/regextest/back/result.rb, line 118
def merge_look_ahead_elems(offset, sub_results)
  term_offset = offset + sub_results.size
  # puts "offset=#{offset}, end_offset=#{sub_results.size}, term_offset=#{term_offset}"
  
  # intersect elems
  offset.step(term_offset-1) do | i |
    sub_elem = sub_results[i-offset]
    
    if i < @results.size   # it is NOT @end_offset
      if(!@results[i].intersect(sub_elem))
        return nil
      end
    else
      @results.push(sub_elem)
    end
  end
  true
end
merge_look_behind() click to toggle source

Merge results of look behind

# File lib/regextest/back/result.rb, line 189
def merge_look_behind
  @look_behinds.each do | elem |
    offset = elem[:offset]
    sub_results = elem[:results]
    command = elem[:cmd]
    
    merge_anchors(offset, sub_results)
    case command
    when :CMD_LOOK_BEHIND
      if !merge_look_behind_elems(offset, sub_results)
        return nil
      end
    when :CMD_NOT_LOOK_BEHIND
      if !merge_not_look_behind_elems(offset, sub_results)
        return nil
      end
    else
      raise "invalid command at merge_look_behind: #{command}"
    end
  end
  true
end
merge_look_behind_elems(offset, sub_results) click to toggle source

Merge each elements of look behinds

# File lib/regextest/back/result.rb, line 213
def merge_look_behind_elems(offset, sub_results)
  unshift_length = offset - sub_results.end_offset
  if unshift_length > 0
    # @results = sub_results[0..(unshift_length-1)] + @results
    if !unshift_params(unshift_length)
      return false
    end
  end

  # intersect elems
  sub_offset = (unshift_length >=0)?unshift_length:(-unshift_length)
  pre_part = []
  0.step(sub_results.end_offset-1) do | i |
    sub_elem = sub_results[i]
    if i < sub_offset
      pre_part.push sub_elem
    else
      if(!@results[i-sub_offset].intersect(sub_elem))
        return nil
      end
    end
  end
  @results = pre_part + @results
  true
end
merge_not_look_ahead_elems(offset, sub_results) click to toggle source

Merge each elements of not-look-aheads

# File lib/regextest/back/result.rb, line 138
def merge_not_look_ahead_elems(offset, sub_results)
  if Regextest::Back::Result === sub_results
    term_offset = offset + sub_results.end_offset
  else
    term_offset = offset + sub_results.size
  end
  try_order = TstShuffle(sub_results.size.times.to_a)
  found = false
  # exclude, at least, one element
  try_order.each do | j |
    results_work = @results.dup
    cur_offset = offset + j
  
    # puts "offset=#{offset} term_offset=#{term_offset}"
    offset.step(term_offset-1).each do | i |
      sub_elem = sub_results[i-offset]
      
      if i < results_work.size   # it is NOT @end_offset
        if i == cur_offset
          if(!results_work[i].exclude(sub_elem))
            next
          else
            found = true
          end
        else
          # do nothing
        end
      else
        if i == cur_offset
          if(reverse_work = sub_elem.reverse)
            results_work.push reverse_work
            found = true
          else
            results_work.push(Regextest::Back::Element.any_char)
          end
        else
          results_work.push(Regextest::Back::Element.any_char)
        end
      end
    end
    if found
      @results = results_work
      break
    end
  end
  # pp @results
  # puts "found = #{found}"
  found
end
merge_not_look_behind_elems(offset, sub_results) click to toggle source

Merge each elements of not look behinds

# File lib/regextest/back/result.rb, line 240
def merge_not_look_behind_elems(offset, sub_results)
  unshift_length = sub_results.end_offset - offset
  if unshift_length > 0
    if !unshift_params(unshift_length)
      return false
    end
  end
  
  try_order = TstShuffle(sub_results.size.times.to_a)
  found = false
  # exclude, at least, one element
  try_order.each do | j |
    results_work = @results.dup

    # intersect elems
    results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
    sub_offset = (unshift_length >=0)?unshift_length:(-unshift_length)
    0.step(sub_results.end_offset-1) do | i |
      sub_elem = sub_results[i]
      
      if i < sub_offset
        if i == j
          results_work.unshift (sub_elem.reverse)
          found = true
        else
          results_work.unshift (Regextest::Back::Element.any_char)
        end
      else
        if i == j
          if(!results_work[results_offset+i].exclude(sub_elem))
            next
          else
            found = true
          end
        else
          # do nothing
        end
      end
    end
    if found
      @results = results_work
      break
    end
  end
  found
end
narrow_down() click to toggle source

narrow down candidate by anchors

# File lib/regextest/back/result.rb, line 309
def narrow_down
  narrow_down_by_anchors &&
  narrow_down_by_reluctant_repeat
end
narrow_down_by_anchors() click to toggle source

narrow down candidate by anchors

# File lib/regextest/back/result.rb, line 334
def narrow_down_by_anchors
  @positional_anchors.each do | cmd, offsets |
    case cmd
    when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
      return false if offsets.max > 0
    when :CMD_ANC_STRING_END
      return false if offsets.min < (@results.size() - 1)
    when :CMD_ANC_STRING_END2
      min_offset = offsets.min
      if min_offset < (@results.size() -1)
        return false
      elsif min_offset == (@results.size() -1)
        if @results[min_offset].new_line?
          @results[min_offset].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_LINE_BEGIN
      offsets.each do | offset |
        if offset == 0
            # ok
        elsif @results[offset-1].new_line?
          @results[offset-1].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_LINE_END
      offsets.each do | offset |
        if offset == @results.size
            # ok
        elsif @results[offset].new_line?
          @results[offset].set_new_line
        else
          return false
        end
      end
    when :CMD_ANC_WORD_BOUND
      offsets.uniq.size.times do | i |
        offset = offsets[i]
        # puts "before offset:#{offset} #{@results}"
        if offset > 0 && offset < @results.size
          if !bound_process(@results[offset-1], @results[offset])
            return false
          end
        elsif @results.size == 0
          @results.push (Regextest::Back::Element.any_char)
          @results.push (Regextest::Back::Element.any_char)
          bound_process(@results[0], @results[1])
        elsif offset == @results.size
          if !is_end_anchor?
            @results.push (Regextest::Back::Element.any_char)
            if !bound_process(@results[-2], @results[-1])
              return false
            end
          elsif !@results[-1].word_elements?
            return false
          end
        elsif offset == 0
          if !is_begin_anchor?
            if !unshift_params(1)
              return false
            end
            @results.unshift (Regextest::Back::Element.any_char)
            if !bound_process(@results[0], @results[1])
              return false
            end
          elsif !@results[0].word_elements?
            return false
          end
        end
      end
    when :CMD_ANC_WORD_UNBOUND
      offsets.uniq.size.times do | i |
        offset = offsets[i]
        # puts "before offset:#{offset} #{@results}"
        if offset > 0 && offset < @results.size
          if !unbound_process(@results[offset-1], @results[offset])
            return false
          end
        elsif @results.size == 0
          @results.push (Regextest::Back::Element.any_char)
          @results.push (Regextest::Back::Element.any_char)
          unbound_process(@results[0], @results[1])
        elsif offset == @results.size
          if !is_end_anchor?
            @results.push (Regextest::Back::Element.any_char)
            if !unbound_process(@results[-2], @results[-1])
              return false
            end
          elsif @results[-1].word_elements?
            return false
          end
        elsif offset == 0
          if !is_begin_anchor?
            if !unshift_params(1)
              return false
            end
            @results.unshift (Regextest::Back::Element.any_char)
            if !unbound_process(@results[0], @results[1])
              return false
            end
          elsif @results[0].word_elements?
            return false
          end
        end
      end
    when :CMD_ANC_LOOK_BEHIND2
      @start_offset = offsets.max
    else
      raise "command (#{cmd}) not implemented"
    end
  end
  return true
end
narrow_down_by_reluctant_repeat() click to toggle source

narrow down candidate by reluctant repeat

# File lib/regextest/back/result.rb, line 315
def narrow_down_by_reluctant_repeat
  @reluctant_repeat.each do | repeat_id, offsets |
    repeat_part  = @results[offsets[0]...offsets[1]]
    succeed_part = @results[offsets[1]..-1]
    # puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
    
    if succeed_part.size > 0
      # reluctant repeat is equivalent to not_look_ahead!
      (offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
        if !merge_not_look_ahead_elems(offset, succeed_part)
          return false
        end
      end
    end
  end
  return true
end
push_body(elem) click to toggle source

Adds elem

# File lib/regextest/back/result.rb, line 31
def push_body(elem)
  @results.push elem
  @end_offset += 1
end
size() click to toggle source

size of results

# File lib/regextest/back/result.rb, line 42
def size
  @results.size
end
unbound_process(elem1, elem2) click to toggle source

unbound process (B)

# File lib/regextest/back/result.rb, line 477
def unbound_process(elem1, elem2)
  if    elem1.word_elements?
    elem2.set_word_elements
  elsif elem1.non_word_elements?
    elem2.set_non_word_elements
  elsif elem2.word_elements?
    elem1.set_word_elements
  elsif elem2.non_word_elements?
    elem1.set_non_word_elements
  else
    if(TstRand(2)==0)
      elem1.set_word_elements
      elem2.set_word_elements
    else
      elem1.set_non_word_elements
      elem2.set_non_word_elements
    end
  end
  if elem1.empty? || elem2.empty?
    return false
  end
  true
end
unshift_params(unshift_length) click to toggle source

unshift parameters

# File lib/regextest/back/result.rb, line 296
def unshift_params(unshift_length)
  @look_aheads.each{|elem| elem[:offset] += unshift_length}
  @look_behinds.each{|elem| elem[:offset] += unshift_length}
  @positional_anchors.each do | cmd, offsets |
    return false if(cmd == :CMD_ANC_STRING_BEGIN)
    offsets.map!{| offset | offset += unshift_length}
  end
  @start_offset += unshift_length
  @end_offset += unshift_length
  true
end