class Elastic::Stats::NaiveBayes::TokenStats

Provide statistics about a token in a specific set of data

Attributes

set[R]
token[R]

Public Class Methods

new(token, set) click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 8
def initialize(token, set)
  @token = token
  @set = set
end

Public Instance Methods

bayes(category) click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 38
def bayes(category)
  return 0 if count == 0
  return 0 if (probability(category) + inverse(category)) == 0
  calculated = log_protect(
    probability(category) / (probability(category) + inverse(category))
  )
  adjust(calculated)
  Math.log(1 - calculated) - Math.log(calculated)
end
categories() click to toggle source

Returns the categories associated with the token in the set as a Hash

# File lib/elastic/stats/naive-bayes/token_stats.rb, line 19
def categories
  set.token_categories[token]
end
count() click to toggle source

Returns the number of documents that contains the token

# File lib/elastic/stats/naive-bayes/token_stats.rb, line 14
def count
  set.tokens[token]
end
inverse(category) click to toggle source

Returns the inverse probability that a token is in the category

# File lib/elastic/stats/naive-bayes/token_stats.rb, line 31
def inverse(category)
  return 0 unless categories.has_key? category
  return 0 if (set.count - set.categories[category]) == 0
  (count - categories[category]) / \
    (set.count - set.categories[category]).to_f
end
probability(category) click to toggle source

Returns the probability that a token is in the specified category

# File lib/elastic/stats/naive-bayes/token_stats.rb, line 24
def probability(category)
  return 0 unless categories.has_key? category
  return 0 if set.categories[category] == 0
  categories[category] / set.categories[category].to_f
end

Private Instance Methods

adjust(probability, weight = 1, target = 0.5) click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 50
def adjust(probability, weight = 1, target = 0.5)
  ((weight * target) + (count * probability)) / (1 + count)
end
log_protect(probability) click to toggle source
# File lib/elastic/stats/naive-bayes/token_stats.rb, line 56
def log_protect(probability)
  return 0.0001 if probability == 0
  return 0.9999 if probability == 1
  probability
end