53 #ifndef BAYESIAN_INFORMATION_CRITERION_H
54 #define BAYESIAN_INFORMATION_CRITERION_H
84 double sM = pow(s, (
double)M);
88 for (
size_t i=0; i < k; i++) {
93 double root2pi = sqrt(2 * M_PI);
95 for (
size_t i=0; i < p.
size(); i++) {
99 + log(1.0 / (root2pi * sM))
100 - (1 / (2 * s2)) * d * d
104 const size_t pj = (k-1) + M*k + 1;
105 return lD - pj/2 * log((
double)R);
123 template <
typename SizeIterator,
typename DissimIterator>
124 double bic(
size_t k, SizeIterator cluster_sizes, DissimIterator sum2_dissim,
size_t dimensionality) {
126 const double R = std::accumulate(cluster_sizes, cluster_sizes + k, 0);
127 const double M = dimensionality;
128 const double logR = log(R);
129 const double log2pi = log(2 * M_PI);
130 const double pj = (k-1) + M*k + 1;
131 const double s2 = std::accumulate(sum2_dissim, sum2_dissim + k, 0.0) / (R - k);
134 double criterion = 0;
135 for (
size_t i=0; i < k; i++) {
136 const double Rn = *(cluster_sizes + i);
138 - (Rn * log2pi) / 2.0
139 - (Rn * M * log(s2)) / 2.0
144 criterion -= (pj/2.0 * logR);
151 #endif // BAYESIAN_INFORMATION_CRITERION_H
double total_squared_dissimilarity(const partition &p, D dist)
Compute the total squared dissimilarity between all objects and their medoids.
std::vector< object_id > medoid_ids
Gives the index of the object that is the ith medoid.
Class to represent a partitioning of a data set.
size_t size() const
Total number of objects in the partition.
Data types and functions for dealing with dissimilarity matrices.
std::vector< medoid_id > cluster_ids
Gives cluster id (index in medoids) for the ith object.
This represents a partitioning of a data set.
double bic(const partition &p, D distance, size_t M)
Directly computes the BIC from a partition object based on the cluster centroids and the number of cl...
size_t num_clusters() const
Total number of clusters in the partition.