class JnbClassifier::Classifier

Attributes

result[R]

Public Class Methods

new() click to toggle source
# File lib/jnb_classifier.rb, line 9
def initialize
  @frequency_table = Hash.new         # frequency table for each class
  @word_table = Hash.new              # word feature table
  @label_count = Hash.new(0)          # count by each label
  @total_count = 0                    # total learned documents
  @result = Hash.new                             
end

Public Instance Methods

classify(attributes) click to toggle source
# File lib/jnb_classifier.rb, line 32
def classify(attributes)
  score = Hash.new(0)               # result
  label_p = Hash.new(0)             # P(label)
  laplace_word_p = Hash.new(0)      # P(X|label)
   
  # P(Label)
  @label_count.each{|label,freq|
    label_p[label] = Math.log(freq.fdiv(@total_count))
  }

  # P(X|Label)
  @frequency_table.each_key{|label|
    deno = @label_count[label] + @word_table.size()
    @word_table.each_key{|word|
      laplace_word_p[label] += Math.log( (@frequency_table[label][word] + 1).fdiv(deno) )
    }
    score[label] = laplace_word_p[label] + label_p[label]
  }

  # result
  score.each{|label, value|
    @result[label] = value
  }
  score.max_by{ |x| x[1] } 
 end
learn(document) click to toggle source
# File lib/jnb_classifier.rb, line 17
def learn(document)
  # If frequency table does NOT have the label hash, add it
  unless @frequency_table.has_key?(document.label) then
    @frequency_table[document.label] = Hash.new(0)
  end

  document.attributes.each{|word, frequency|
    @frequency_table[document.label][word] += 1   # Multivariate Berounoulli
    @word_table[word] = 1
  }

  @label_count[document.label] += 1
  @total_count += 1
end