class NaiveBayes::Classifier
Attributes
backend[RW]
default_category[RW]
Public Class Methods
load_yaml(yaml_file)
click to toggle source
will load into a memory-backed classifier
# File lib/nb/classifier.rb, line 117 def load_yaml(yaml_file) data = YAML.load_file(yaml_file) new(data[:categories], backend: :memory).tap do |classifier| classifier.tokens_count = data[:tokens_count] classifier.categories_count = data[:categories_count] end end
new(*categories)
click to toggle source
# File lib/nb/classifier.rb, line 8 def initialize(*categories) if categories.last.is_a?(Hash) options = categories.pop else options = {} end options[:backend] ||= :memory case options[:backend] when :memory @backend = Backend::Memory.new(categories) when :redis options[:host] ||= 'localhost' options[:port] ||= 6379 @backend = Backend::Redis.new(categories, host: options[:host], port: options[:port]) else raise "unsupported backend: #{options[:backend]}" end @default_category = categories.first end
Public Instance Methods
assumed_probability()
click to toggle source
If we have only trained a little bit a class may not have had a feature yet give it a probability of 0 may not be true so we produce a assumed probability which gets smaller more we train
# File lib/nb/classifier.rb, line 97 def assumed_probability 0.5 / (total_number_of_items.to_f / 2) end
classifications(*tokens)
click to toggle source
# File lib/nb/classifier.rb, line 54 def classifications(*tokens) scores = {} backend.categories.each do |category| scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category) end scores.sort_by { |k, v| -v } end
classify(*tokens)
click to toggle source
# File lib/nb/classifier.rb, line 44 def classify(*tokens) result = classifications(*tokens).first if result.last == 0.0 [@default_category, 0.0] else result end end
clear!()
click to toggle source
# File lib/nb/classifier.rb, line 40 def clear! backend.clear! end
data()
click to toggle source
# File lib/nb/classifier.rb, line 101 def data { :categories => backend.categories, :tokens_count => backend.tokens_count, :categories_count => backend.categories_count } end
probability_of_a_category(category)
click to toggle source
# File lib/nb/classifier.rb, line 82 def probability_of_a_category(category) backend.categories_count[category].to_f / total_number_of_items end
probability_of_a_token_given_a_category(token, category)
click to toggle source
# File lib/nb/classifier.rb, line 70 def probability_of_a_token_given_a_category(token, category) return assumed_probability if backend.tokens_count[category][token] == 0 backend.tokens_count[category][token].to_f / backend.categories_count[category] end
probability_of_a_token_in_category(token, category)
click to toggle source
# File lib/nb/classifier.rb, line 66 def probability_of_a_token_in_category(token, category) probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) } end
probability_of_tokens_given_a_category(tokens, category)
click to toggle source
# File lib/nb/classifier.rb, line 76 def probability_of_tokens_given_a_category(tokens, category) tokens.inject(1.0) do |product, token| product * probability_of_a_token_given_a_category(token, category) end end
save(yaml_file)
click to toggle source
# File lib/nb/classifier.rb, line 109 def save(yaml_file) raise 'only memory backend can save' unless backend == :memory File.write(yaml_file, data.to_yaml) end
top_tokens_of_category(category, count=20)
click to toggle source
# File lib/nb/classifier.rb, line 62 def top_tokens_of_category(category, count=20) backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count) end
total_number_of_items()
click to toggle source
def total_number_of_tokens
@tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
end
# File lib/nb/classifier.rb, line 90 def total_number_of_items backend.categories_count.values.inject(&:+) end
train(category, *tokens)
click to toggle source
# File lib/nb/classifier.rb, line 32 def train(category, *tokens) backend.train(category, *tokens) end
untrain(category, *tokens)
click to toggle source
# File lib/nb/classifier.rb, line 36 def untrain(category, *tokens) backend.untrain(category, *tokens) end