class LevenshteinRb::LevenshteinDistance
Attributes
d[R]
m[R]
n[R]
s[R]
t[R]
Public Class Methods
new(a_string, another_string)
click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 27 def initialize(a_string, another_string) @s, @t = a_string, another_string @m, @n = s.length.to_i, t.length.to_i @d = RecurrenceMatrix.new(m, n) # d is simply the default notation for a recurrence matrix in literature end
Public Instance Methods
to_i()
click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 33 def to_i return m if n == 0 return n if m == 0 (1..n).each do |j| (1..m).each do |i| d[i][j] = costs_for_step(i, j) end end d[m][n] end
Private Instance Methods
costs_for_step(i, j)
click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 61 def costs_for_step(i, j) if same_character_for_both_words_is_added?(i, j) d[i-1][j-1] else obtain_minimal_value_from_neighbors(i, j) + 1 end end
obtain_minimal_value_from_neighbors(i, j)
click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 53 def obtain_minimal_value_from_neighbors(i, j) [ d[i-1][j], # Look left in the matrix. This would be a "deletion". It costs + 1 more than d[i-1][j]] d[i][j-1], # Look directly above the current entry in matrix. This would correspond to an insertion which costs +1 additionally d[i-1][j-1], # Completely substitute the character, this also costs +1 more than d[j-1, i-1] ].min end
same_character_for_both_words_is_added?(i, j)
click to toggle source
# File lib/levenshtein_rb/levenshtein_distance.rb, line 49 def same_character_for_both_words_is_added?(i, j) s[i-1] == t[j-1] end