class ACT::Trie

Main class for creating Aho-Corasick Trie from array of words of dictionary

Attributes

dictionary[R]

Dictionary attribute

root[R]

Root vertex

trie[R]

Trie attribute

Public Class Methods

new(dictionary) click to toggle source

Initialize new trie and fill it with words from dictionary

Remarks:

  • dictioanry is array of characters

  • if indexing is important array should not be sorted

  • word from sentence may contain spaces and special characters, so one “word” can be the whole sentence

  • word can be an integer, but result will be converted to the string

Example:

>> ACT.new(["abc", "cde", 8, "ad f", "wer\nm"])

Arguments:

dictionary: (Array)
# File lib/act/trie.rb, line 33
def initialize(dictionary)
  @root = ACT::Vertex.new
  @dictionary = dictionary
  @trie = build_trie
end

Public Instance Methods

backtrace_to_word(vertex) click to toggle source

Returns hash with word and indexes at dictionary

  • Ending vertex of chain should be used as argument, it means that it should contain at least one value in the array of end_indexes attribute

Example:

>> vertex = act.trie.get_child('s').get_child('h').get_child('e')
=> #<ACT::Vertex:0x000055cabb2399d0
      @char="e",
      @children=[],
      @end_indexes=[1, 8],
      @parent=
         #<ACT::Vertex:0x000055cabb239ac0
            @char="h",
            @children=[#<ACT::Vertex:0x000055cabb2399d0 ...>],
            @end_indexes=[],
            @parent=
              #<ACT::Vertex:0x000055cabb239bb0
              @char="s",
              @children=[#<ACT::Vertex:0x000055cabb239ac0 ...>],
              ...

Arguments:

vertes: (ACT::Vertex)
# File lib/act/trie.rb, line 83
def backtrace_to_word(vertex)
  if vertex.end_indexes.empty?
    raise 'Argument should be ending vertex of chain, and contain at'\
          'least one value in the array of end_indexes attribute'
  else
    chain = backtrace(vertex)
    {
      word: chain.reduce('') { |acc, v| acc << v.char },
      indexes: chain.last.end_indexes
    }
  end
end
extend_dictionay(dict) click to toggle source

Adds additional words(chains of vertexes) to the trie object

  • Argument should be array of words

Example:

>> act.extend_dictionary(["our", "it", "them"])
# File lib/act/trie.rb, line 101
def extend_dictionay(dict)
  build_trie(dict)
end
parse(text) click to toggle source

Executes text analyze and returns map occurring words with indexes from dictionary Example:

>> act.parse('he their them height have then their shelter')
=> [ {:word=>"he", :indexes=>[0, 5]},
     {:word=>"their", :indexes=>[7]},
     {:word=>"he", :indexes=>[0, 5]},
     {:word=>"he", :indexes=>[0, 5]},
     {:word=>"he", :indexes=>[0, 5]},
     {:word=>"he", :indexes=>[0, 5]},
     {:word=>"their", :indexes=>[7]},
     {:word=>"he", :indexes=>[0, 5]},
     {:word=>"she", :indexes=>[1, 8]},
     {:word=>"he", :indexes=>[0, 5]}]

Arguments:

text: (String)
# File lib/act/trie.rb, line 55
def parse(text)
  text = text.to_s.split('')
  vm = vertex_map(text) { :vertex }
  exec_branches(text, vm).flatten.compact
end

Private Instance Methods

backtrace(vertex) click to toggle source
# File lib/act/trie.rb, line 153
def backtrace(vertex)
  result = !vertex.nil? ? [vertex] : []
  until vertex.parent.nil?
    result << vertex.parent unless vertex.parent.char.nil?
    vertex = vertex.parent
  end
  result.reverse
end
build_trie(dict = @dictionary) click to toggle source
# File lib/act/trie.rb, line 162
def build_trie(dict = @dictionary)
  parent = @root
  dict.each_with_index do |word, index|
    word = word.to_s
    word.to_s.each_char.with_index do |char, char_index|
      end_index = char_index == (word.length - 1) ? index : nil
      @vertex = (char_index.zero? ? @root : parent)
      parent = @vertex.add_child(char, end_index)
    end
  end
  @root
end
end_vertex?(vertex) click to toggle source
# File lib/act/trie.rb, line 149
def end_vertex?(vertex)
  !vertex.end_indexes.empty?
end
exec_branches(text, vertex_map) click to toggle source
# File lib/act/trie.rb, line 107
def exec_branches(text, vertex_map)
  vertex_map.map do |b|
    b[:indexes].map do |index|
      search(b[:key], text[index + 1..-1])
    end
  end
end
search_rest(vertex, text) click to toggle source
# File lib/act/trie.rb, line 126
def search_rest(vertex, text)
  result = []
  text.each do |char|
    current_vertex = vertex.get_child(char)
    break if current_vertex.nil?

    result << backtrace_to_word(current_vertex) if end_vertex?(current_vertex)
    break if current_vertex.children.empty?

    vertex = current_vertex
  end
  result
end
vertex_map(text) { || ... } click to toggle source
# File lib/act/trie.rb, line 140
def vertex_map(text)
  @trie.children.map do |vertex|
    {
      key: vertex.send(yield),
      indexes: text.collect.with_index { |c, i| i if c == vertex.char }.compact
    }
  end
end