class Bayes::PaulGraham

Public Instance Methods

estimate(tokens, take=15) click to toggle source
# File lib/bayes.rb, line 224
def estimate(tokens, take=15)
        s = tokens.uniq.map{|i| score(i)}.compact.sort{|a, b| (0.5-a).abs <=> (0.5-b)}.reverse[0...take]
        return nil if s.empty? || s.include?(1.0) && s.include?(0.0)

        prod = s.inject(1.0){|r, i| r*i}
        return prod/(prod+s.inject(1.0){|r, i| r*(1-i)})
end
score(token) click to toggle source
# File lib/bayes.rb, line 216
def score(token)
        return 0.4 unless @spam.include?(token) or @ham.include?(token)
        g = @ham.count==0 ? 0.0 : [1.0, 2*@ham[token]/@ham.count.to_f].min
        b = @spam.count==0 ? 0.0 : [1.0, @spam[token]/@spam.count.to_f].min
        r = [0.01, [0.99, b/(g+b)].min].max
        r
end