class KMeansCrystal::Model

Public Class Methods

new(cluster_num, entries, **params) click to toggle source
# File lib/kmeans-crystal.rb, line 62
def initialize(cluster_num, entries, **params)
    raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num

    @cluster_num = cluster_num
    @entries = entries

    @vector_name = case params[:vector_name]
    when nil
        :features
    else
        params[:vector_name]
    end

    @measure = case params[:distance]
    when 'manhattan'
        Measure::Manhattan
    when 'euclidean',nil
        Measure::Euclidean
    else
        raise 'incorrect value for distance'
    end

    init_centroids = case params[:init_centroids]
    when 'random'
        @entries.sample(@cluster_num).map{|x| x[@vector_name]}
    when 'kmeans++',nil
        kmeans_pp(@entries, @cluster_num)
    else
        raise 'incorrect value for init_centroids'
    end

    @clusters = new_clusters(init_centroids)
end

Public Instance Methods

predict(entry) click to toggle source
# File lib/kmeans-crystal.rb, line 114
def predict(entry)
    raise 'has not been trained' if @clusters.nil?
    predicted_cluster = get_min(@clusters, entry)
    return predicted_cluster.name
end
rename_clusters() { |named_map| ... } click to toggle source
# File lib/kmeans-crystal.rb, line 120
def rename_clusters
    named_map = Hash.new
    @clusters.each{|x| named_map[x.name] = x.name }
    yield(named_map)
    @clusters.each do |cluster|
        cluster.name = named_map[cluster.name]
    end
end
result() click to toggle source
# File lib/kmeans-crystal.rb, line 110
def result
    return @clusters.map{|x| x.output}
end
train() { |i| ... } click to toggle source
# File lib/kmeans-crystal.rb, line 96
def train
    i = 0
    while true do
        @entries.each do |entry|
            min_cluster = get_min(@clusters, entry)
            min_cluster.entries << entry
        end

        yield(i+=1, @clusters.map{|x| x.output} )

        @clusters = new_clusters_from_old(@clusters)
    end
end

Private Instance Methods

get_min(centroids, entry) click to toggle source
# File lib/kmeans-crystal.rb, line 130
def get_min(centroids, entry)
    min_cluster = centroids[0]
    last_distance = centroids[0].distance(entry)
    1.upto(centroids.size-1) do |i|
        current_distance = centroids[i].distance(entry)
        next if current_distance > last_distance
        min_cluster = centroids[i]
        last_distance = current_distance
    end
    return min_cluster
end
kmeans_pp(entries, cluster_num) click to toggle source
# File lib/kmeans-crystal.rb, line 158
def kmeans_pp(entries, cluster_num)
    features = entries.map{|x| x[@vector_name]}
    dimension = features.first.size
    init_val = Array.new(dimension){[0.0,0.0]}
    dimension.times do |i|
        init_val[i][0] = features.map{|f| f[i]}.to_a.min
        init_val[i][1] = features.map{|f| f[i]}.to_a.max
    end

    init_centroids = entries.sample(cluster_num).map{|x| x[@vector_name]}
    combination = 2**dimension
    combination.times do |i|
        break if i >= init_centroids.size
        offset = i.to_s(2).rjust(dimension,"0")
        dimension.times do |d|
            max_or_min = offset[d].to_i
            init_centroids[i][d] = init_val[d][max_or_min]
        end

    end
    return init_centroids
end
new_clusters(centroids) click to toggle source
# File lib/kmeans-crystal.rb, line 142
def new_clusters(centroids)
    clusters = Array.new
    centroids.each_with_index do |centroid, i|
        clusters << Cluster.new("cluster#{i}", centroid, @vector_name, @measure)
    end
    return clusters
end
new_clusters_from_old(clusters) click to toggle source
# File lib/kmeans-crystal.rb, line 150
def new_clusters_from_old(clusters)
    arr = Array.new
    clusters.each do |cluster|
        arr << Cluster.new(cluster.name, cluster.update_centroid, @vector_name, @measure)
    end
    return arr
end