class SuffixTree::Base

en.wikipedia.org/wiki/Ukkonen%27s_algorithm

Constants

UNIQ_IDENTIFIERS

Attributes

active[RW]
end[RW]
input[RW]
l_map[RW]
remaining[RW]
root[RW]
uniq_word_map[RW]
words[RW]

Public Class Methods

new(words) click to toggle source
# File lib/suffix_tree/base.rb, line 10
def initialize(words)
  @words = words
  @input = ''
  @l_map = {}
  @uniq_word_map = {}
  words.each_with_index do |word, index|
    id = UNIQ_IDENTIFIERS[index]
    @uniq_word_map[id] = index
    @input += "#{word}#{id}"
  end
  @input = @input.split('')
  @remaining = 0
end

Public Instance Methods

base_case() click to toggle source
# File lib/suffix_tree/base.rb, line 208
def base_case
  max_length = -1
  answer = ''

  words.each do |word|
    if max_length < word.length
      max_length = word.length
      answer = word
    elsif max_length == word.length
      answer = [answer, word].min
    end
  end
  answer
end
build() click to toggle source
# File lib/suffix_tree/base.rb, line 24
def build
  self.root = Node.new(1, End.new(0), words.count)
  root.index = -1
  self.active = ActivePoint.new(root)
  self.end = End.new(-1)
  input.each_with_index do |_elm, index|
    start_phase(index)
  end

  set_index_dfs(root, 0, input.length)
  set_lvalues_dfs(root)
end
diff(node) click to toggle source
# File lib/suffix_tree/base.rb, line 174
def diff(node)
  node.end.end - node.start
end
longest_common_substring(k = words.length) click to toggle source
# File lib/suffix_tree/base.rb, line 186
def longest_common_substring(k = words.length)
  raise 'Input has to be integer' unless k.is_a? Integer
  raise 'Invalid Input' if k <= 0

  return base_case if k == 1

  max_length = -1
  answer = ''

  l_map.each do |key, v|
    next if key < k

    if v.length > max_length
      max_length = v.length
      answer = v
    elsif v.length == max_length
      answer = [v, answer].min
    end
  end
  answer
end
next_char(i) click to toggle source
# File lib/suffix_tree/base.rb, line 159
def next_char(i)
  node = selected_node
  return input[active.node.child[input[active.edge]].start + active.length] if diff(node) >= active.length

  if diff(node) + 1 == active.length
    return input[i] if node.child[input[i]]
  else
    active.node = node
    active.length = active.length - diff(node) - 1
    active.edge = active.edge + diff(node) + 1
    return next_char(i)
  end
  raise 'End Of Path Reached'
end
select_node(index) click to toggle source
# File lib/suffix_tree/base.rb, line 182
def select_node(index)
  active.node.child[input[index]]
end
selected_node() click to toggle source
# File lib/suffix_tree/base.rb, line 178
def selected_node
  active.node.child[input[active.edge]]
end
set_index_dfs(node, val, size) click to toggle source
# File lib/suffix_tree/base.rb, line 58
def set_index_dfs(node, val, size)
  return unless node

  val += node.end.end - node.start + 1
  if node.index != -1
    node.index = size - val
    c_index = node.index

    c_index += 1 until uniq_word_map[input[c_index]]
    node.c_values[uniq_word_map[input[c_index]]] = 1
    return
  end
  nums = [node.c_values]
  node.child.each do |_key, child|
    set_index_dfs(child, val, size)
    nums << child.c_values
  end
  if node != root
    node.c_values = nums.transpose.map(&:sum).map { |e| e.positive? ? 1 : 0 }
  end
  node.c_value = node.c_values.sum
end
set_lvalues_dfs(node, str = '') click to toggle source
# File lib/suffix_tree/base.rb, line 37
def set_lvalues_dfs(node, str = '')
  return unless node

  return if node.index != -1

  str += input[node.start..node.end.end].join('')
  if l_map[node.c_value]
    if l_map[node.c_value].length < str.length
      l_map[node.c_value] = str
    elsif l_map[node.c_value].length == str.length
      l_map[node.c_value] = [str, l_map[node.c_value]].min
    end
  else
    l_map[node.c_value] = str
  end

  node.child.each do |_key, child|
    set_lvalues_dfs(child, str)
  end
end
start_phase(index) click to toggle source
# File lib/suffix_tree/base.rb, line 81
def start_phase(index)
  last_created_internal_node = nil
  self.end.end += 1
  self.remaining += 1

  while remaining.positive?
    if active.length.zero?
      if select_node(index)
        active.edge = select_node(index).start
        active.length += 1
        break
      else
        root.child[input[index]] = Node.new(index, self.end, words.count)
        self.remaining -= 1
      end
    else
      begin
        char = next_char(index)
        if char == input[index]
          last_created_internal_node.suffix_link = selected_node if last_created_internal_node
          walk_down(index)
          break
        else
          node = selected_node
          temp_start = node.start
          node.start = node.start + active.length
          new_internal_node = Node.new(temp_start, End.new(temp_start + active.length - 1), words.count)

          new_leaf_node = Node.new(index, self.end, words.count)

          new_internal_node.child[input[new_internal_node.start + active.length]] = node
          new_internal_node.child[input[index]] = new_leaf_node
          new_internal_node.index = -1
          active.node.child[input[new_internal_node.start]] = new_internal_node

          last_created_internal_node.suffix_link = new_internal_node if last_created_internal_node

          last_created_internal_node = new_internal_node
          new_internal_node.suffix_link = root

          if active.node != root
            active.node = active.node.suffix_link
          else
            active.edge = active.edge + 1
            active.length -= 1
          end
          self.remaining -= 1
        end
      rescue StandardError
        node = selected_node
        node.child[input[index]] = Node.new(index, self.end, words.count)
        last_created_internal_node.suffix_link = node if last_created_internal_node
        last_created_internal_node = node

        if active.node != root
          active.node = active.node.suffix_link
        else
          active.edge = active.edge + 1
          active.length -= 1
        end
        self.remaining -= 1
      end
    end
  end
end
walk_down(index) click to toggle source
# File lib/suffix_tree/base.rb, line 147
def walk_down(index)
  node = selected_node

  if diff(node) < active.length
    active.node = node
    active.length = active.length - diff(node)
    active.edge = node.child[input[index]].start
  else
    active.length += 1
  end
end