class NaiveBayes::Classifier

Attributes

backend[RW]
default_category[RW]

Public Class Methods

load_yaml(yaml_file) click to toggle source

will load into a memory-backed classifier

# File lib/nb/classifier.rb, line 117
def load_yaml(yaml_file)
  data = YAML.load_file(yaml_file)

  new(data[:categories], backend: :memory).tap do |classifier|
    classifier.tokens_count = data[:tokens_count]
    classifier.categories_count = data[:categories_count]
  end
end
new(*categories) click to toggle source
# File lib/nb/classifier.rb, line 8
def initialize(*categories)
  if categories.last.is_a?(Hash)
    options = categories.pop
  else
    options = {}
  end

  options[:backend] ||= :memory

  case options[:backend]
  when :memory
    @backend = Backend::Memory.new(categories)
  when :redis
    options[:host] ||= 'localhost'
    options[:port] ||= 6379

    @backend = Backend::Redis.new(categories, host: options[:host], port: options[:port])
  else
    raise "unsupported backend: #{options[:backend]}"
  end

  @default_category = categories.first
end

Public Instance Methods

assumed_probability() click to toggle source

If we have only trained a little bit a class may not have had a feature yet give it a probability of 0 may not be true so we produce a assumed probability which gets smaller more we train

# File lib/nb/classifier.rb, line 97
def assumed_probability
  0.5 / (total_number_of_items.to_f / 2)
end
classifications(*tokens) click to toggle source
# File lib/nb/classifier.rb, line 54
def classifications(*tokens)
  scores = {}
  backend.categories.each do |category|
    scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
  end
  scores.sort_by { |k, v| -v }
end
classify(*tokens) click to toggle source
# File lib/nb/classifier.rb, line 44
def classify(*tokens)
  result = classifications(*tokens).first

  if result.last == 0.0
    [@default_category, 0.0]
  else
    result
  end
end
clear!() click to toggle source
# File lib/nb/classifier.rb, line 40
def clear!
  backend.clear!
end
data() click to toggle source
# File lib/nb/classifier.rb, line 101
def data
  {
    :categories => backend.categories,
    :tokens_count => backend.tokens_count,
    :categories_count => backend.categories_count
  }
end
probability_of_a_category(category) click to toggle source
# File lib/nb/classifier.rb, line 82
def probability_of_a_category(category)
  backend.categories_count[category].to_f / total_number_of_items
end
probability_of_a_token_given_a_category(token, category) click to toggle source
# File lib/nb/classifier.rb, line 70
def probability_of_a_token_given_a_category(token, category)
  return assumed_probability if backend.tokens_count[category][token] == 0

  backend.tokens_count[category][token].to_f / backend.categories_count[category]
end
probability_of_a_token_in_category(token, category) click to toggle source
# File lib/nb/classifier.rb, line 66
def probability_of_a_token_in_category(token, category)
  probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
end
probability_of_tokens_given_a_category(tokens, category) click to toggle source
# File lib/nb/classifier.rb, line 76
def probability_of_tokens_given_a_category(tokens, category)
  tokens.inject(1.0) do |product, token|
    product * probability_of_a_token_given_a_category(token, category)
  end
end
save(yaml_file) click to toggle source
# File lib/nb/classifier.rb, line 109
def save(yaml_file)
  raise 'only memory backend can save' unless backend == :memory

  File.write(yaml_file, data.to_yaml)
end
top_tokens_of_category(category, count=20) click to toggle source
# File lib/nb/classifier.rb, line 62
def top_tokens_of_category(category, count=20)
  backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
end
total_number_of_items() click to toggle source

def total_number_of_tokens

@tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }

end

# File lib/nb/classifier.rb, line 90
def total_number_of_items
  backend.categories_count.values.inject(&:+)
end
train(category, *tokens) click to toggle source
# File lib/nb/classifier.rb, line 32
def train(category, *tokens)
  backend.train(category, *tokens)
end
untrain(category, *tokens) click to toggle source
# File lib/nb/classifier.rb, line 36
def untrain(category, *tokens)
  backend.untrain(category, *tokens)
end