class Text::WhiteSimilarity
Ruby implementation of the string similarity described by Simon White at: www.catalysoft.com/articles/StrikeAMatch.html
2 * |pairs(s1) INTERSECT pairs(s2)| similarity(s1, s2) = ----------------------------------- |pairs(s1)| + |pairs(s2)|
e.g.
2 * |{FR, NC}| similarity(FRANCE, FRENCH) = --------------------------------------- |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}| = (2 * 2) / (5 + 5) = 0.4 WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
Public Class Methods
new()
click to toggle source
# File lib/text/white_similarity.rb, line 32 def initialize @word_letter_pairs = {} end
similarity(str1, str2)
click to toggle source
# File lib/text/white_similarity.rb, line 28 def self.similarity(str1, str2) new.similarity(str1, str2) end
Public Instance Methods
similarity(str1, str2)
click to toggle source
# File lib/text/white_similarity.rb, line 36 def similarity(str1, str2) pairs1 = word_letter_pairs(str1) pairs2 = word_letter_pairs(str2).dup union = pairs1.length + pairs2.length intersection = 0 pairs1.each do |pair1| if index = pairs2.index(pair1) intersection += 1 pairs2.delete_at(index) end end (2.0 * intersection) / union end
Private Instance Methods
word_letter_pairs(str)
click to toggle source
# File lib/text/white_similarity.rb, line 54 def word_letter_pairs(str) @word_letter_pairs[str] ||= str.upcase.split(/\s+/).map{ |word| (0 ... (word.length - 1)).map { |i| word[i, 2] } }.flatten.freeze end