class MemDB::Index::SequenceMatch::SequenceIndex

en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm

Public Class Methods

new(pattern) click to toggle source
# File lib/mem_db/index/sequence_match.rb, line 27
def initialize(pattern)
  @pattern = pattern
  @pattern_length = pattern.length
  @bad_char_skip = {}
  @good_suffix_skip = Array.new(@pattern_length, 0)

  init_tables
end

Public Instance Methods

index(seq) click to toggle source
# File lib/mem_db/index/sequence_match.rb, line 36
def index(seq)
  i = @pattern_length - 1

  while i < seq.length
    j = @pattern_length - 1

    while j >= 0 && seq[i] == @pattern[j]
      i -= 1
      j -= 1
    end

    return i + 1 if j.negative?

    bad_skip = @bad_char_skip[seq[i]] || @pattern_length
    good_skip = @good_suffix_skip[j]
    i += good_skip > bad_skip ? good_skip : bad_skip
  end

  -1
end
init_tables() click to toggle source
# File lib/mem_db/index/sequence_match.rb, line 57
def init_tables # rubocop:disable Metrics/AbcSize
  last = @pattern_length - 1

  i = 0
  while i < last
    @bad_char_skip[@pattern[i]] = last - i
    i += 1
  end

  last_prefix = last

  i = last
  while i >= 0
    last_prefix = i + 1 if pattern_suffix?(i + 1)

    @good_suffix_skip[i] = last_prefix + last - i
    i -= 1
  end

  i = 0
  while i < last
    len_suffix = longest_pattern_suffix(i)

    if @pattern[i - len_suffix] != @pattern[last - len_suffix]
      @good_suffix_skip[last - len_suffix] = len_suffix + last - i
    end

    i += 1
  end
end
longest_pattern_suffix(pos) click to toggle source
# File lib/mem_db/index/sequence_match.rb, line 98
def longest_pattern_suffix(pos)
  i = 0

  while i < @pattern.length && i < pos
    break if @pattern[@pattern.length - 1 - i] != @pattern[pos - i]

    i += 1
  end

  i
end
pattern_suffix?(pos) click to toggle source
# File lib/mem_db/index/sequence_match.rb, line 88
def pattern_suffix?(pos)
  i = 0
  while i + pos < @pattern.length
    return false if @pattern[i] != @pattern[i + pos]

    i += 1
  end
  true
end