class Elastic::Stats::NaiveBayes::Predictor

Utility to perform Naive Bayes category predictions on text

Attributes

adjust[W]
prior_set[R]

Public Class Methods

new(prior_set) click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 14
def initialize(prior_set)
  @prior_set = prior_set
end

Public Instance Methods

guess(subject) click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 18
def guess(subject)
  scores = {}
  prior_set.categories.keys.each do |category|
    scores[category] = score(subject, category)
  end
  Hash[scores.sort_by { |label, score| -score }]
end
score(subject, category) click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 26
def score(subject, category)
  # Calculate the propability for each token in this category
  log_sum = tokenize(subject).reduce(0) do |sum, token|
    stats = TokenStats.new(token, prior_set)
    sum + stats.bayes(category)
  end

  1 / (1 + Math.exp(log_sum))
end
tokenize(subject) click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 36
def tokenize(subject)
  @tokenize ||= Hash.new { |h, k| h[k] = prior_set.tokenize k }
  @tokenize[subject]
end