Attachment 'HierarchicalClustering.cpp'
Download 1 #include <assert.h>
2 #include <stdio.h>
3 #include <vector>
4
5 double MAX = 100.0;
6
7 // Our similarity function. Is 1 when the points are equal, 0 when they are MAX
8 // apart, and between 0 and 1 otherwise.
9 double sim(double x, double y)
10 {
11 return 1 - abs(x - y) / MAX;
12 }
13
14 int main(int argc, char** argv)
15 {
16 int n = 10;
17
18 // Pick random points.
19 printf("\n");
20 std::vector<double> X(n);
21 srand48(99);
22 for (int i = 0; i < n; ++i) {
23 X[i] = MAX * drand48();
24 printf("%6.1f", X[i]);
25 }
26 printf("\n");
27
28 // Initial clustering.
29 std::vector<int> C(n);
30 for (int i = 0; i < n; ++i) {
31 C[i] = i;
32 printf("%6d", C[i]);
33 }
34 printf("\n\n");
35
36 // Compute all pairwise similarities.
37 std::vector<std::vector<double> > sims(n);
38 for (int i = 0; i < n; ++i) {
39 sims[i].resize(n);
40 for (int j = 0; j < n; ++j) {
41 sims[i][j] = sim(X[i], X[j]);
42 printf("%6.2f", sims[i][j]);
43 }
44 printf(" %6.1f\n", X[i]);
45 }
46
47 // Now iteratively merge the pair of clusters with the largest similarity.
48 for (int i = 1; i < n; ++i) {
49 printf("\nIteration #%d:\n", i);
50
51 // 1. Find the pair of clusters which are most similar.
52 double maxSim = -1;
53 std::pair<int, int> maxSimPair = std::make_pair(-1, -1);
54 for (int j = 0; j < n; ++j) {
55 for (int k = 0; k < n; ++k) {
56 double sim_jk = sims[j][k];
57 if (C[j] != C[k] && sim_jk > maxSim) {
58 maxSim = sim_jk;
59 maxSimPair = std::make_pair(j, k);
60 }
61 }
62 }
63 assert(maxSim != -1);
64 assert(maxSimPair.first != -1);
65 assert(maxSimPair.second != -1);
66 printf("Closest pair of points is (X[%d] = %.1f, X[%d] = %.1f)\n",
67 maxSimPair.first, X[maxSimPair.first],
68 maxSimPair.second, X[maxSimPair.second]);
69
70 // 2. Merge the clusters. C always contains the smallest index of an element
71 // in the cluster. For example, if X[3] and X[5] and X[6] are in one
72 // cluster, then C[3] = C[5] = C[6] = 3.
73 int c1 = C[maxSimPair.first];
74 int c2 = C[maxSimPair.second];
75 int c = c1 < c2 ? c1 : c2;
76 printf("Merging clusters %d and %d into cluster %d\n",
77 c1, c2, c);
78 for (int j = 0; j < n; ++j) {
79 if (C[j] == c1 || C[j] == c2) {
80 C[j] = c;
81 }
82 printf("%6d", C[j]);
83 }
84 printf("\n");
85
86 // For now, do only one iteration.
87 // if (i >= 3) break;
88 }
89 printf("\n");
90 }
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.