class NBayes::Data

Attributes

data[RW]

Public Class Methods

new(options = {}) click to toggle source
# File lib/nbayes.rb, line 47
def initialize(options = {})
  @data = Hash.new
  #@data = {
  #  "category1": {
  #    "tokens": Hash.new(0),
  #    "total_tokens": 0,
  #    "examples": 0
  #  },
  # ...
  #}
end

Public Instance Methods

add_token_to_category(category, token) click to toggle source

Add this token to this category

# File lib/nbayes.rb, line 118
def add_token_to_category(category, token)
  cat_data(category)[:tokens][token] += 1
  cat_data(category)[:total_tokens] += 1
end
cat_data(category) click to toggle source
# File lib/nbayes.rb, line 67
def cat_data(category)
  unless data[category].is_a? Hash
    data[category] = new_category
  end
  data[category]
end
categories() click to toggle source
# File lib/nbayes.rb, line 59
def categories
  data.keys
end
category_stats() click to toggle source
# File lib/nbayes.rb, line 74
def category_stats
  tmp = []
  total_example_count = total_examples
  self.each do |category|
    e = example_count(category)
    t = token_count(category)
    tmp << "For category #{category}, %d examples (%.02f%% of the total) and %d total_tokens" % [e, 100.0 * e / total_example_count, t]
  end
  tmp.join("\n")
end
count_of_token_in_category(category, token) click to toggle source

How many times does this token appear in this category?

# File lib/nbayes.rb, line 134
def count_of_token_in_category(category, token)
  cat_data(category)[:tokens][token]
end
decrement_examples(category) click to toggle source

Decrement the number of training examples for this category. Delete the category if the examples counter is 0.

# File lib/nbayes.rb, line 96
def decrement_examples(category)
  cat_data(category)[:examples] -= 1
  delete_category(category) if cat_data(category)[:examples] < 1
end
delete_category(category) click to toggle source
# File lib/nbayes.rb, line 171
def delete_category(category)
  data.delete(category) if data.has_key?(category)
  categories
end
delete_token_from_category(category, token) click to toggle source
# File lib/nbayes.rb, line 138
def delete_token_from_category(category, token)
  count = count_of_token_in_category(category, token)
  cat_data(category)[:tokens].delete(token)
  # Update this category's total token count
  cat_data(category)[:total_tokens] -= count
end
each(&block) click to toggle source
# File lib/nbayes.rb, line 85
def each(&block)
  data.keys.each(&block)
end
example_count(category) click to toggle source
# File lib/nbayes.rb, line 101
def example_count(category)
  cat_data(category)[:examples]
end
increment_examples(category) click to toggle source

Increment the number of training examples for this category

# File lib/nbayes.rb, line 90
def increment_examples(category)
  cat_data(category)[:examples] += 1
end
new_category() click to toggle source
# File lib/nbayes.rb, line 163
def new_category
  {
    :tokens => Hash.new(0),             # holds freq counts
    :total_tokens => 0,
    :examples => 0
  }
end
purge_less_than(token, x) click to toggle source
# File lib/nbayes.rb, line 145
def purge_less_than(token, x)
  return if token_count_across_categories(token) >= x
  self.each do |category|
    delete_token_from_category(category, token)
  end
  true  # Let caller know we removed this token
end
remove_token_from_category(category, token) click to toggle source

Decrement the token counter in a category If the counter is 0, delete the token. If the total number of tokens is 0, delete the category.

# File lib/nbayes.rb, line 126
def remove_token_from_category(category, token)
  cat_data(category)[:tokens][token] -= 1
  delete_token_from_category(category, token) if cat_data(category)[:tokens][token] < 1
  cat_data(category)[:total_tokens] -= 1
  delete_category(category) if cat_data(category)[:total_tokens] < 1
end
reset_after_import() click to toggle source
# File lib/nbayes.rb, line 159
def reset_after_import
  self.each {|category| cat_data(category)[:tokens].default = 0 }
end
token_count(category) click to toggle source
# File lib/nbayes.rb, line 105
def token_count(category)
  cat_data(category)[:total_tokens]
end
token_count_across_categories(token) click to toggle source

XXX - TODO - use count_of_token_in_category Return the total number of tokens we've seen across all categories

# File lib/nbayes.rb, line 155
def token_count_across_categories(token)
  data.keys.inject(0){|sum, cat| sum + @data[cat][:tokens][token] }
end
token_trained?(token, category) click to toggle source
# File lib/nbayes.rb, line 63
def token_trained?(token, category)
  data[category] ? data[category][:tokens].has_key?(token) : false
end
total_examples() click to toggle source

XXX - Add Enumerable and see if I get inject? Total number of training instances

# File lib/nbayes.rb, line 111
def total_examples
  sum = 0
  self.each {|category| sum += example_count(category) }
  sum
end