class SomethingLikeThat::Scorer
Computes similarity scores in asymmetrical comparison of string phrases.
Attributes
mean_exponent[R]
threshold[R]
query_matchers[R]
Public Class Methods
generalized_mean(numbers, p = mean_exponent)
click to toggle source
# File lib/something_like_that/scorer.rb, line 27 def generalized_mean(numbers, p = mean_exponent) (numbers.map { |x| x**p }.reduce(:+).to_f / numbers.length)**(1.0 / p) end
mean_exponent=(p)
click to toggle source
# File lib/something_like_that/scorer.rb, line 11 def mean_exponent=(p) unless p.is_a?(Integer) raise TypeError, 'mean_exponent must be an integer' # TODO: add explanation in rdoc documentation? end @mean_exponent = p end
new(query, matcher = Amatch::JaroWinkler)
click to toggle source
# File lib/something_like_that/scorer.rb, line 34 def initialize(query, matcher = Amatch::JaroWinkler) @query_matchers = query.tokens.map { |token| matcher.new(token) } end
threshold=(threshold)
click to toggle source
# File lib/something_like_that/scorer.rb, line 19 def threshold=(threshold) unless threshold.between?(0, 1) raise RangeError, 'threshold must be between 0 and 1' # TODO: add explanation in rdoc documentation? end @threshold = threshold end
Public Instance Methods
match?(candidate)
click to toggle source
# File lib/something_like_that/scorer.rb, line 42 def match?(candidate) score(candidate) > self.class.threshold end
score(candidate)
click to toggle source
# File lib/something_like_that/scorer.rb, line 38 def score(candidate) average(top_scores(candidate.tokens)) end
Private Instance Methods
apply_threshold(score)
click to toggle source
# File lib/something_like_that/scorer.rb, line 61 def apply_threshold(score) score > self.class.threshold ? score : 0 end
average(scores)
click to toggle source
# File lib/something_like_that/scorer.rb, line 65 def average(scores) self.class.generalized_mean(scores) end
tokenwise_score_table(candidate_tokens)
click to toggle source
# File lib/something_like_that/scorer.rb, line 53 def tokenwise_score_table(candidate_tokens) query_matchers.map do |matcher| candidate_tokens.map do |token| apply_threshold(matcher.match(token)) end end end
top_scores(candidate_tokens, max_finder = TwoDArray)
click to toggle source
# File lib/something_like_that/scorer.rb, line 48 def top_scores(candidate_tokens, max_finder = TwoDArray) all_scores = tokenwise_score_table(candidate_tokens) max_finder.new(all_scores).maxima end