class NBayes::Data
Attributes
data[RW]
Public Class Methods
new(options = {})
click to toggle source
# File lib/nbayes.rb, line 47 def initialize(options = {}) @data = Hash.new #@data = { # "category1": { # "tokens": Hash.new(0), # "total_tokens": 0, # "examples": 0 # }, # ... #} end
Public Instance Methods
add_token_to_category(category, token)
click to toggle source
Add this token to this category
# File lib/nbayes.rb, line 118 def add_token_to_category(category, token) cat_data(category)[:tokens][token] += 1 cat_data(category)[:total_tokens] += 1 end
cat_data(category)
click to toggle source
# File lib/nbayes.rb, line 67 def cat_data(category) unless data[category].is_a? Hash data[category] = new_category end data[category] end
categories()
click to toggle source
# File lib/nbayes.rb, line 59 def categories data.keys end
category_stats()
click to toggle source
# File lib/nbayes.rb, line 74 def category_stats tmp = [] total_example_count = total_examples self.each do |category| e = example_count(category) t = token_count(category) tmp << "For category #{category}, %d examples (%.02f%% of the total) and %d total_tokens" % [e, 100.0 * e / total_example_count, t] end tmp.join("\n") end
count_of_token_in_category(category, token)
click to toggle source
How many times does this token appear in this category?
# File lib/nbayes.rb, line 134 def count_of_token_in_category(category, token) cat_data(category)[:tokens][token] end
decrement_examples(category)
click to toggle source
Decrement the number of training examples for this category. Delete the category if the examples counter is 0.
# File lib/nbayes.rb, line 96 def decrement_examples(category) cat_data(category)[:examples] -= 1 delete_category(category) if cat_data(category)[:examples] < 1 end
delete_category(category)
click to toggle source
# File lib/nbayes.rb, line 171 def delete_category(category) data.delete(category) if data.has_key?(category) categories end
delete_token_from_category(category, token)
click to toggle source
# File lib/nbayes.rb, line 138 def delete_token_from_category(category, token) count = count_of_token_in_category(category, token) cat_data(category)[:tokens].delete(token) # Update this category's total token count cat_data(category)[:total_tokens] -= count end
each(&block)
click to toggle source
# File lib/nbayes.rb, line 85 def each(&block) data.keys.each(&block) end
example_count(category)
click to toggle source
# File lib/nbayes.rb, line 101 def example_count(category) cat_data(category)[:examples] end
increment_examples(category)
click to toggle source
Increment the number of training examples for this category
# File lib/nbayes.rb, line 90 def increment_examples(category) cat_data(category)[:examples] += 1 end
new_category()
click to toggle source
# File lib/nbayes.rb, line 163 def new_category { :tokens => Hash.new(0), # holds freq counts :total_tokens => 0, :examples => 0 } end
purge_less_than(token, x)
click to toggle source
# File lib/nbayes.rb, line 145 def purge_less_than(token, x) return if token_count_across_categories(token) >= x self.each do |category| delete_token_from_category(category, token) end true # Let caller know we removed this token end
remove_token_from_category(category, token)
click to toggle source
Decrement the token counter in a category If the counter is 0, delete the token. If the total number of tokens is 0, delete the category.
# File lib/nbayes.rb, line 126 def remove_token_from_category(category, token) cat_data(category)[:tokens][token] -= 1 delete_token_from_category(category, token) if cat_data(category)[:tokens][token] < 1 cat_data(category)[:total_tokens] -= 1 delete_category(category) if cat_data(category)[:total_tokens] < 1 end
reset_after_import()
click to toggle source
# File lib/nbayes.rb, line 159 def reset_after_import self.each {|category| cat_data(category)[:tokens].default = 0 } end
token_count(category)
click to toggle source
# File lib/nbayes.rb, line 105 def token_count(category) cat_data(category)[:total_tokens] end
token_count_across_categories(token)
click to toggle source
XXX - TODO - use count_of_token_in_category
Return the total number of tokens we've seen across all categories
# File lib/nbayes.rb, line 155 def token_count_across_categories(token) data.keys.inject(0){|sum, cat| sum + @data[cat][:tokens][token] } end
token_trained?(token, category)
click to toggle source
# File lib/nbayes.rb, line 63 def token_trained?(token, category) data[category] ? data[category][:tokens].has_key?(token) : false end
total_examples()
click to toggle source
XXX - Add Enumerable and see if I get inject? Total number of training instances
# File lib/nbayes.rb, line 111 def total_examples sum = 0 self.each {|category| sum += example_count(category) } sum end