class Lurn::NaiveBayes::MultinomialNaiveBayes
Attributes
prior_probabilities[RW]
probability_matrix[RW]
unique_labels[RW]
Public Class Methods
new()
click to toggle source
# File lib/lurn/naive_bayes/multinomial_naive_bayes.rb, line 7 def initialize end
Public Instance Methods
fit(vectors, labels)
click to toggle source
# File lib/lurn/naive_bayes/multinomial_naive_bayes.rb, line 11 def fit(vectors, labels) vectors = Matrix.rows(vectors) @unique_labels = labels.uniq @feature_count = vectors.column_size count_matrix = build_count_matrix(vectors, labels) @probability_matrix = build_probability_matrix(count_matrix, labels) @prior_probabilities = @unique_labels.map do |l1| labels.count { |l2| l1 == l2 }.to_f / labels.count.to_f end end
Private Instance Methods
build_count_matrix(vectors, labels)
click to toggle source
# File lib/lurn/naive_bayes/multinomial_naive_bayes.rb, line 40 def build_count_matrix(vectors, labels) matrix = Array.new(@unique_labels.count) { Array.new(@feature_count, 0) } vectors.each_with_index do |value, row, col| label = labels[row] label_index = @unique_labels.index(label) matrix[label_index][col] += value end Matrix.rows(matrix) end
build_probability_matrix(count_matrix, labels)
click to toggle source
# File lib/lurn/naive_bayes/multinomial_naive_bayes.rb, line 25 def build_probability_matrix(count_matrix, labels) probability_matrix = Array.new(@unique_labels.count) { Array.new(@feature_count, 0.0) } count_matrix.each_with_index do |value, row, col| label = @unique_labels[row] label_frequency = labels.count(label) numerator = (value.to_f + 1.0) denominator = count_matrix.row(row).inject(:+) + @feature_count probability_matrix[row][col] = Math.log(numerator / denominator) end probability_matrix end
joint_log_likelihood(vector)
click to toggle source
# File lib/lurn/naive_bayes/multinomial_naive_bayes.rb, line 52 def joint_log_likelihood(vector) jlls = [] @unique_labels.each_with_index do |label, label_index| probabilities = @probability_matrix[label_index] jll = vector.dot(probabilities) jll += Math.log(@prior_probabilities[label_index]) jlls.push(jll) end jlls end