Muster
 All Classes Namespaces Files Functions Variables Typedefs Macros
partition.cpp
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2010, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory
4 // LLNL-CODE-433662
5 // All rights reserved.
6 //
7 // This file is part of Muster. For details, see http://github.com/tgamblin/muster.
8 // Please also read the LICENSE file for further information.
9 //
10 // Redistribution and use in source and binary forms, with or without modification, are
11 // permitted provided that the following conditions are met:
12 //
13 // * Redistributions of source code must retain the above copyright notice, this list of
14 // conditions and the disclaimer below.
15 // * Redistributions in binary form must reproduce the above copyright notice, this list of
16 // conditions and the disclaimer (as noted below) in the documentation and/or other materials
17 // provided with the distribution.
18 // * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
19 // or promote products derived from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
22 // OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
24 // LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
25 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //////////////////////////////////////////////////////////////////////////////////////////////////
31 
32 ///
33 /// @file partition.cpp
34 /// @author Todd Gamblin tgamblin@llnl.gov
35 ///
36 #include "partition.h"
37 
38 #include <iostream>
39 #include <iterator>
40 #include <map>
41 #include <cassert>
42 #include <algorithm>
43 
44 #include "counter.h"
45 #include "stl_utils.h"
46 
47 using namespace std;
48 
49 namespace cluster {
50 
51  partition::partition(size_t num_objects) : cluster_ids(num_objects, 0) {
52  if (num_objects) medoid_ids.resize(1);
53  }
54 
55 
57 
58 
59  void partition::to_cluster_list(cluster_list& clusters) const {
60  clusters.clear();
61  clusters.resize(medoid_ids.size());
62  for (size_t object=0; object < cluster_ids.size(); object++) {
63  clusters[cluster_ids[object]].insert(object);
64  }
65  }
66 
67 
68  void partition::swap(partition& other) {
69  medoid_ids.swap(other.medoid_ids);
70  cluster_ids.swap(other.cluster_ids);
71  }
72 
73 
74  void partition::sort() {
75  // first create a mapping from new ids to old ids.
76  vector<size_t> mapping(medoid_ids.size());
77  generate(mapping.begin(), mapping.end(), sequence());
78 
79  std::sort(mapping.begin(), mapping.end(), indexed_lt(medoid_ids));
80  invert(mapping);
81  std::sort(medoid_ids.begin(), medoid_ids.end());
82 
83  // translate old cluster ids to new ones.
84  for (size_t i=0; i < cluster_ids.size(); i++) {
85  cluster_ids[i] = mapping[cluster_ids[i]];
86  }
87  }
88 
89 
90  static void write(ostream& out, const cluster_list& clusters, const vector<object_id> *medoid_ids = NULL) {
91  if (medoid_ids) {
92  out << "Medoids: ";
93  copy(medoid_ids->begin(), medoid_ids->end(), ostream_iterator<object_id>(out, " "));
94  out << endl;
95  }
96 
97  for (unsigned i=0; i < clusters.size(); i++) {
98  out << i << "\t";
99  const set<object_id>& c = clusters[i];
100 
101  for (set<object_id>::const_iterator obj=c.begin(); obj != c.end(); obj++) {
102  if (medoid_ids && (*medoid_ids)[i] == *obj) {
103  out << "[" << *obj << "] ";
104  } else {
105  out << *obj << " ";
106  }
107  }
108  out << endl;
109  }
110  }
111 
112 
113  ostream& operator<<(ostream& out, const cluster_list& clusters) {
114  write(out, clusters);
115  return out;
116  }
117 
118 
119  size_t partition::size(size_t i) const {
120  return count(cluster_ids.begin(), cluster_ids.end(), i);
121  }
122 
123 
124 
125  double mirkin_distance(const cluster_list& c1, const cluster_list& c2) {
126  size_t c1_sum2 = 0;
127  size_t n = 0;
128  for (size_t i=0; i < c1.size(); i++) {
129  c1_sum2 += c1[i].size() * c1[i].size();
130  n += c1[i].size();
131  }
132 
133  size_t c2_sum2 = 0;
134  for (size_t i=0; i < c2.size(); i++) {
135  c2_sum2 += c2[i].size() * c2[i].size();
136  }
137 
138 
139  size_t c1c2_sum2 = 0;
140  for (size_t i=0; i < c1.size(); i++) {
141  for (size_t j=0; j < c2.size(); j++) {
142  size_t size;
143  set_intersection(c1[i].begin(), c1[i].end(),
144  c2[j].begin(), c2[j].end(),
145  counter(size));
146  c1c2_sum2 += size * size;
147  }
148  }
149 
150  return (c1_sum2 + c2_sum2 - (2 * c1c2_sum2)) / (double)(n*n);
151  }
152 
153 
154  std::ostream& operator<<(std::ostream& out, const partition& p) {
155  cluster_list list;
156  p.to_cluster_list(list);
157  write(out, list, &p.medoid_ids);
158  return out;
159  }
160 
161 
162  double mirkin_distance(const partition& c1, const partition& c2) {
163  cluster_list l1, l2;
164  c1.to_cluster_list(l1);
165  c2.to_cluster_list(l2);
166  return mirkin_distance(l1, l2);
167  }
168 
169 
170  void expand(cluster_list& list, size_t level) {
171  if (!level) return;
172 
173  cluster_list expanded(list.size());
174  for (size_t i=0; i < list.size(); i++) {
175  for (set<object_id>::iterator o=list[i].begin(); o != list[i].end(); o++) {
176  size_t start = (1 << level) * (*o);
177  size_t end = (1 << level) * (*o + 1);
178  for (size_t ex=start; ex < end; ex++) {
179  expanded[i].insert(ex);
180  }
181  }
182  }
183  expanded.swap(list);
184  }
185 
186 
188  object_id o=0;
189  bool first = true;
190  while (o < cluster_ids.size()) {
191  if (cluster_ids[o] == m) {
192  object_id start = o++;
193  while (o < cluster_ids.size() && cluster_ids[o] == m) o++;
194  if (!first) out << " ";
195  if (o == start+1) {
196  out << start;
197  } else {
198  out << start << "-" << (o-1);
199  }
200  first = false;
201  }
202  o++;
203  }
204  }
205 
206 } // namespace cluster
Generator object for a strided sequence of ints.
Definition: stl_utils.h:60
std::vector< object_id > medoid_ids
Gives the index of the object that is the ith medoid.
Definition: partition.h:79
Class to represent a partitioning of a data set.
std::vector< std::set< object_id > > cluster_list
Explicit representation of a clustering.
Definition: partition.h:60
void swap(partition &other)
Fast swap with other patrition objects.
Definition: partition.cpp:68
size_t size() const
Total number of objects in the partition.
Definition: partition.h:109
void sort()
puts medoids in order by their object id, and adjusts cluster_ids accordingly.
Definition: partition.cpp:74
std::ostream & operator<<(std::ostream &out, const id_pair< T > &p)
Print out an id_pair as a tuple of its element and its source rank.
Definition: id_pair.h:103
virtual ~partition()
Virtual destructor; currently does nothing.
Definition: partition.cpp:56
size_t object_id
More descriptive type for object index.
Definition: partition.h:52
size_t medoid_id
More descriptive type for medoid index.
Definition: partition.h:51
void expand(cluster_list &list, size_t level)
Expand a cluster_list by l levels.
Definition: partition.cpp:170
Dummy output iterator that counts how many times it was assigned to. without actually storing anythin...
indexed_lt_functor< Indexable > indexed_lt(const Indexable &container)
Definition: stl_utils.h:42
counter_iterator< T > counter(T &ref)
Adaptor for creating type-inferred counters.
Definition: counter.h:105
void write_members_with_runs(medoid_id m, std::ostream &out)
Write the members of cluster m out to the output stream formatted nicely with hyphenated runs of cons...
Definition: partition.cpp:187
void to_cluster_list(cluster_list &list) const
Creates a list of std::sets from the partition info in medoids and cluster_ids.
Definition: partition.cpp:59
void invert(std::vector< Index > &vec)
Definition: stl_utils.h:48
std::vector< medoid_id > cluster_ids
Gives cluster id (index in medoids) for the ith object.
Definition: partition.h:84
double mirkin_distance(const cluster_list &c1, const cluster_list &c2)
Mirkin distance bt/w two clusterings.
Definition: partition.cpp:125
This represents a partitioning of a data set.
Definition: partition.h:76
Muster. Copyright © 2010, Lawrence Livermore National Laboratory, LLNL-CODE-433662.
Distribution of Muster and its documentation is subject to terms of the Muster LICENSE.
Generated on Thu Sep 1 2016 using Doxygen 1.8.5