class Elastic::Stats::NaiveBayes::TokenStats
Provide statistics about a token in a specific set of data
Attributes
set[R]
token[R]
Public Class Methods
new(token, set)
click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 8 def initialize(token, set) @token = token @set = set end
Public Instance Methods
bayes(category)
click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 38 def bayes(category) return 0 if count == 0 return 0 if (probability(category) + inverse(category)) == 0 calculated = log_protect( probability(category) / (probability(category) + inverse(category)) ) adjust(calculated) Math.log(1 - calculated) - Math.log(calculated) end
categories()
click to toggle source
Returns the categories associated with the token in the set as a Hash
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 19 def categories set.token_categories[token] end
count()
click to toggle source
Returns the number of documents that contains the token
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 14 def count set.tokens[token] end
inverse(category)
click to toggle source
Returns the inverse probability that a token is in the category
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 31 def inverse(category) return 0 unless categories.has_key? category return 0 if (set.count - set.categories[category]) == 0 (count - categories[category]) / \ (set.count - set.categories[category]).to_f end
probability(category)
click to toggle source
Returns the probability that a token is in the specified category
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 24 def probability(category) return 0 unless categories.has_key? category return 0 if set.categories[category] == 0 categories[category] / set.categories[category].to_f end
Private Instance Methods
adjust(probability, weight = 1, target = 0.5)
click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 50 def adjust(probability, weight = 1, target = 0.5) ((weight * target) + (count * probability)) / (1 + count) end
log_protect(probability)
click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 56 def log_protect(probability) return 0.0001 if probability == 0 return 0.9999 if probability == 1 probability end