class Magnifier

Constants

LEARNING_STEPS

make configurable or check for convergence

Attributes

f1_score[R]
features_count[R]
mu_vector[R]
sigma_squared_vector[R]
threshold[RW]
training_set[R]
training_set_size[R]

Public Class Methods

new(examples, threshold = 0.01) click to toggle source

examples is exepcted to be 2-D array of real values

# File lib/magnifier/magnifier.rb, line 14
def initialize(examples, threshold = 0.01)
  @training_set = Numo::DFloat[*examples]
  @training_set_size, @features_count = training_set.shape
  @features_count ||= 1
  @threshold = threshold
  @mu_vector = Numo::DFloat.zeros(@features_count)
  @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
  @f1_score = 0
end

Public Instance Methods

anomaly?(example) click to toggle source
# File lib/magnifier/magnifier.rb, line 63
def anomaly?(example)
  probability(example) < threshold
end
export(path_or_file) click to toggle source
# File lib/magnifier/magnifier.rb, line 71
def export(path_or_file)
  Magnifier::Exporter.export(path_or_file, self)
end
import(path_or_file) click to toggle source
# File lib/magnifier/magnifier.rb, line 67
def import(path_or_file)
  Magnifier::Importer.export(path_or_file, self)
end
optimize_threshold(examples, base_truths) click to toggle source

optimize using F1 score requires cross-validation set (should differ from train set!) todo: convert base truth to boolean

# File lib/magnifier/magnifier.rb, line 32
def optimize_threshold(examples, base_truths)
  boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
  examples_prob = examples.map { |example| probability(example) }

  threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
  @threshold = 0

  (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
    predictions = examples_prob.map { |probability| probability < new_threshold }
    current_f1 = compute_f1_score(predictions, boolean_base_thruths)

    if current_f1 > @f1_score
      @f1_score = current_f1
      @threshold = new_threshold
    end
  end

  [threshold, f1_score]
end
probability(example) click to toggle source
# File lib/magnifier/magnifier.rb, line 52
def probability(example)
  probability = 1
  example.each_with_index do |feature, i|
    feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))

    probability = probability * feature_prob
  end

  probability
end
train() click to toggle source
# File lib/magnifier/magnifier.rb, line 24
def train
  @mu_vector = @training_set.mean(0)
  @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
end

Private Instance Methods

compute_f1_score(predictions, base_truths) click to toggle source
# File lib/magnifier/magnifier.rb, line 77
def compute_f1_score(predictions, base_truths)
  true_positives  = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
  false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
  false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)

  return 0 if true_positives == 0

  precision = true_positives.to_f / (true_positives + false_positives);
  recall = true_positives.to_f / (true_positives + false_negatives);

  (2 * precision * recall) / (precision + recall) rescue 0;
end