class SomethingLikeThat::Scorer

Computes similarity scores in asymmetrical comparison of string phrases.

Attributes

mean_exponent[R]
threshold[R]
query_matchers[R]

Public Class Methods

generalized_mean(numbers, p = mean_exponent) click to toggle source
# File lib/something_like_that/scorer.rb, line 27
def generalized_mean(numbers, p = mean_exponent)
  (numbers.map { |x| x**p }.reduce(:+).to_f / numbers.length)**(1.0 / p)
end
mean_exponent=(p) click to toggle source
# File lib/something_like_that/scorer.rb, line 11
def mean_exponent=(p)
  unless p.is_a?(Integer)
    raise TypeError, 'mean_exponent must be an integer'
    # TODO: add explanation in rdoc documentation?
  end
  @mean_exponent = p
end
new(query, matcher = Amatch::JaroWinkler) click to toggle source
# File lib/something_like_that/scorer.rb, line 34
def initialize(query, matcher = Amatch::JaroWinkler)
  @query_matchers = query.tokens.map { |token| matcher.new(token) }
end
threshold=(threshold) click to toggle source
# File lib/something_like_that/scorer.rb, line 19
def threshold=(threshold)
  unless threshold.between?(0, 1)
    raise RangeError, 'threshold must be between 0 and 1'
    # TODO: add explanation in rdoc documentation?
  end
  @threshold = threshold
end

Public Instance Methods

match?(candidate) click to toggle source
# File lib/something_like_that/scorer.rb, line 42
def match?(candidate)
  score(candidate) > self.class.threshold
end
score(candidate) click to toggle source
# File lib/something_like_that/scorer.rb, line 38
def score(candidate)
  average(top_scores(candidate.tokens))
end

Private Instance Methods

apply_threshold(score) click to toggle source
# File lib/something_like_that/scorer.rb, line 61
def apply_threshold(score)
  score > self.class.threshold ? score : 0
end
average(scores) click to toggle source
# File lib/something_like_that/scorer.rb, line 65
def average(scores)
  self.class.generalized_mean(scores)
end
tokenwise_score_table(candidate_tokens) click to toggle source
# File lib/something_like_that/scorer.rb, line 53
def tokenwise_score_table(candidate_tokens)
  query_matchers.map do |matcher|
    candidate_tokens.map do |token|
      apply_threshold(matcher.match(token))
    end
  end
end
top_scores(candidate_tokens, max_finder = TwoDArray) click to toggle source
# File lib/something_like_that/scorer.rb, line 48
def top_scores(candidate_tokens, max_finder = TwoDArray)
  all_scores = tokenwise_score_table(candidate_tokens)
  max_finder.new(all_scores).maxima
end