module JaroWinkler
Constants
- DEFAULT_ADJ_TABLE
- DEFAULT_OPTIONS
- DEFAULT_THRESHOLD
- DEFAULT_WEIGHT
- VERSION
Public Class Methods
distance(*args)
click to toggle source
VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self) { return distance(argc, argv, self, jaro_winkler_distance_from_codes); }
jaro_distance(*args)
click to toggle source
VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self) { return distance(argc, argv, self, jaro_distance_from_codes); }
Private Class Methods
_distance(codes1, codes2, options = {})
click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 28 def _distance(codes1, codes2, options = {}) options = DEFAULT_OPTIONS[:jaro_winkler].merge options raise InvalidWeightError if options[:weight] > 0.25 jaro_distance = _jaro_distance(codes1, codes2, options) if jaro_distance < options[:threshold] jaro_distance else codes1, codes2 = codes2, codes1 if codes1.length > codes2.length len1 = codes1.length len2 = codes2.length max_4 = len1 > 4 ? 4 : len1 prefix = 0 prefix += 1 while prefix < max_4 && codes1[prefix] == codes2[prefix] jaro_distance + prefix * options[:weight] * (1 - jaro_distance) end end
_jaro_distance(codes1, codes2, options = {})
click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 46 def _jaro_distance(codes1, codes2, options = {}) options = DEFAULT_OPTIONS[:jaro].merge options codes1, codes2 = codes2, codes1 if codes1.length > codes2.length len1 = codes1.length len2 = codes2.length return 0.0 if len1 == 0 || len2 == 0 if options[:ignore_case] codes1.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c } codes2.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c } end window = len2 / 2 - 1 window = 0 if window < 0 flags1 = 0 flags2 = 0 # // count number of matching characters match_count = 0 i = 0 while i < len1 left = i >= window ? i - window : 0 right = i + window <= len2 - 1 ? (i + window) : (len2 - 1) right = len2 - 1 if right > len2 - 1 j = left while j <= right if flags2[j] == 0 && codes1[i] == codes2[j] flags1 |= (1 << i) flags2 |= (1 << j) match_count += 1 break end j += 1 end i += 1 end return 0.0 if match_count == 0 # // count number of transpositions transposition_count = j = k = 0 i = 0 while i < len1 if flags1[i] == 1 j = k while j < len2 if flags2[j] == 1 k = j + 1 break end j += 1 end transposition_count += 1 if codes1[i] != codes2[j] end i += 1 end # // count similarities in nonmatched characters similar_count = 0 if options[:adj_table] && len1 > match_count i = 0 while i < len1 if flags1[i] == 0 j = 0 while j < len2 if flags2[j] == 0 if DEFAULT_ADJ_TABLE[codes1[i].chr(Encoding::UTF_8)][codes2[j].chr(Encoding::UTF_8)] similar_count += 3 break end end j += 1 end end i += 1 end end m = match_count.to_f t = transposition_count / 2 m = similar_count / 10.0 + m if options[:adj_table] (m / len1 + m / len2 + (m - t) / m) / 3 end
validate!(str1, str2)
click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 131 def validate!(str1, str2) raise TypeError unless str1.is_a?(String) && str2.is_a?(String) end