class Elastic::Stats::NaiveBayes::Predictor
Utility to perform Naive Bayes category predictions on text
Attributes
adjust[W]
prior_set[R]
Public Class Methods
new(prior_set)
click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 14 def initialize(prior_set) @prior_set = prior_set end
Public Instance Methods
guess(subject)
click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 18 def guess(subject) scores = {} prior_set.categories.keys.each do |category| scores[category] = score(subject, category) end Hash[scores.sort_by { |label, score| -score }] end
score(subject, category)
click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 26 def score(subject, category) # Calculate the propability for each token in this category log_sum = tokenize(subject).reduce(0) do |sum, token| stats = TokenStats.new(token, prior_set) sum + stats.bayes(category) end 1 / (1 + Math.exp(log_sum)) end
tokenize(subject)
click to toggle source
# File lib/elastic/stats/naive-bayes/predictor.rb, line 36 def tokenize(subject) @tokenize ||= Hash.new { |h, k| h[k] = prior_set.tokenize k } @tokenize[subject] end