class LevenshteinRb::LevenshteinDistance

Attributes

d[R]
m[R]
n[R]
s[R]
t[R]

Public Class Methods

new(a_string, another_string) click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 27
def initialize(a_string, another_string)
  @s, @t = a_string, another_string
  @m, @n = s.length.to_i, t.length.to_i
  @d     = RecurrenceMatrix.new(m, n) # d is simply the default notation for a recurrence matrix in literature
end

Public Instance Methods

to_i() click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 33
def to_i
  return m if n == 0
  return n if m == 0

  (1..n).each do |j|
    (1..m).each do |i|
      d[i][j] = costs_for_step(i, j)
    end
  end

  d[m][n]
end

Private Instance Methods

costs_for_step(i, j) click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 61
def costs_for_step(i, j)
  if same_character_for_both_words_is_added?(i, j)
    d[i-1][j-1]
  else
    obtain_minimal_value_from_neighbors(i, j) + 1
  end
end
obtain_minimal_value_from_neighbors(i, j) click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 53
def obtain_minimal_value_from_neighbors(i, j)
  [
    d[i-1][j], # Look left in the matrix. This would be a "deletion". It costs + 1 more than d[i-1][j]]
    d[i][j-1], # Look directly above the current entry in matrix. This would correspond to an insertion which costs +1 additionally
    d[i-1][j-1], # Completely substitute the character, this also costs +1 more  than d[j-1, i-1]
  ].min
end
same_character_for_both_words_is_added?(i, j) click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 49
def same_character_for_both_words_is_added?(i, j)
  s[i-1] == t[j-1]
end