class Kmeans
Public Class Methods
argmin(arr)
click to toggle source
# File lib/kmeans.rb, line 78 def self.argmin arr i = 0 (1..(arr.length-1)).each do |index| if arr[i] >= arr[index] i = index end end return i end
euclidean_distance(p1, p2)
click to toggle source
cosine
# File lib/kmeans.rb, line 51 def self.euclidean_distance p1, p2 sum_1 = 0 sum_2 = 0 sum_12 = 0 p1.each_with_index do |p, i| sum_1 = sum_1 + p**2 sum_12 = sum_12 + (p1[i].to_f * p2[i].to_f) end p2.each_with_index do |p| sum_2 = sum_2 + p**2 end return 1 - sum_12 / (Math.sqrt(sum_1) * Math.sqrt(sum_2)) end
getCenter(ps)
click to toggle source
# File lib/kmeans.rb, line 65 def self.getCenter ps p = [] ps.each do |p1| p1.each_with_index do |p2, i| p[i] = p[i].to_f+ p2.to_f end end p.each_with_index do |p3, i| p[i] = p[i]/ps.count end return p end
group_data(data, centers)
click to toggle source
# File lib/kmeans.rb, line 33 def self.group_data data, centers y = [] (0..(centers.length-1)).each do |i| y[i] = [] end list = [] data.each do |x| centers.each do |center| list.push(euclidean_distance(x, center)) end min = argmin(list) y[min].push(x) list = [] end return y end
hi()
click to toggle source
# File lib/kmeans.rb, line 4 def self.hi puts "K-means clustering ruby!" end
run(k, data)
click to toggle source
# File lib/kmeans.rb, line 8 def self.run k, data # random first centers centers = data.sample(k) y = [] (0..(k-1)).each do |i| y[i] = [] end while true # save pre-loop groups y_old = y # grouping y = group_data(data, centers) # break while loop if groups are not changed if y === y_old break end # update centers centers = [] y.each do |ct| centers.push(getCenter ct) end end return centers, y end