class DbClustering::DistanceMetrics::CosineSimilarity
Public Class Methods
new(min_dimensions: 1)
click to toggle source
# File lib/distance_metrics/cosine_similarity.rb, line 6 def initialize(min_dimensions: 1) @min_dimensions = min_dimensions end
Public Instance Methods
correlation(vector1, vector2)
click to toggle source
# File lib/distance_metrics/cosine_similarity.rb, line 14 def correlation(vector1, vector2) vector1_array = vector1.array_for_comparison(vector2) vector2_array = vector2.array_for_comparison(vector1) if vector1_array.count != vector2_array.count raise "Vectors with different sizes cannot be compared" end if vector1_array.count < @min_dimensions return Float::INFINITY end # see here for calculation formula: https://en.wikipedia.org/wiki/Cosine_similarity numerator = 0 vector1_array.count.times do |i| numerator += vector1_array[i] * vector2_array[i] end left_sqrt = sqrt(vector1_array.reduce(0) { |sum, v1i| sum + v1i ** 2 }) right_sqrt = sqrt(vector2_array.reduce(0) { |sum, v2i| sum + v2i ** 2 }) denominator = left_sqrt * right_sqrt numerator.to_f / denominator end
distance(vector1, vector2)
click to toggle source
# File lib/distance_metrics/cosine_similarity.rb, line 10 def distance(vector1, vector2) 1.0 - correlation(vector1, vector2) end