class DbClustering::DistanceMetrics::PearsonCorrelation
Public Class Methods
new(min_dimensions: 1)
click to toggle source
# File lib/distance_metrics/pearson_correlation.rb, line 6 def initialize(min_dimensions: 1) @min_dimensions = min_dimensions end
Public Instance Methods
correlation(vector1, vector2)
click to toggle source
# File lib/distance_metrics/pearson_correlation.rb, line 14 def correlation(vector1, vector2) vector1_array = vector1.array_for_comparison(vector2) vector2_array = vector2.array_for_comparison(vector1) if vector1_array.count != vector2_array.count raise "Vectors with different sizes cannot be compared" end if vector1_array.count < @min_dimensions return Float::INFINITY end # see here for calculation formula: http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient v1_mean = vector1_array.reduce(:+) / vector1_array.count.to_f v2_mean = vector2_array.reduce(:+) / vector2_array.count.to_f numerator = 0 vector1_array.count.times do |i| numerator += (vector1_array[i] - v1_mean) * (vector2_array[i] - v2_mean) end left_sqrt = sqrt(vector1_array.reduce(0) { |sum, v1i| sum + (v1i - v1_mean) ** 2 }) right_sqrt = sqrt(vector2_array.reduce(0) { |sum, v2i| sum + (v2i - v2_mean) ** 2 }) denominator = left_sqrt * right_sqrt numerator.to_f / denominator end
distance(vector1, vector2)
click to toggle source
# File lib/distance_metrics/pearson_correlation.rb, line 10 def distance(vector1, vector2) 1.0 - correlation(vector1, vector2) end