AD Teaching Wiki
  • Comments
  • Immutable Page
  • Menu
    • Navigation
    • RecentChanges
    • FindPage
    • Local Site Map
    • Help
    • HelpContents
    • HelpOnMoinWikiSyntax
    • Display
    • Attachments
    • Info
    • Raw Text
    • Print View
    • Edit
    • Load
    • Save
  • Login

FrontPage

AD Teaching Wiki:
  • attachment:HierarchicalClustering.cpp of SearchEnginesWS0910

Attachment 'HierarchicalClustering.cpp'

Download

   1 #include <assert.h>
   2 #include <stdio.h>
   3 #include <vector>
   4 
   5 double MAX = 100.0;
   6 
   7 // Our similarity function. Is 1 when the points are equal, 0 when they are MAX
   8 // apart, and between 0 and 1 otherwise.
   9 double sim(double x, double y)
  10 {
  11   return 1 - abs(x - y) / MAX;
  12 }
  13 
  14 int main(int argc, char** argv)
  15 {
  16   int n = 10;
  17 
  18   // Pick random points.
  19   printf("\n");
  20   std::vector<double> X(n);
  21   srand48(99);
  22   for (int i = 0; i < n; ++i) {
  23     X[i] = MAX * drand48(); 
  24     printf("%6.1f", X[i]);
  25   }
  26   printf("\n");
  27 
  28   // Initial clustering.
  29   std::vector<int> C(n);
  30   for (int i = 0; i < n; ++i) { 
  31     C[i] = i;
  32     printf("%6d", C[i]);
  33   }
  34   printf("\n\n");
  35 
  36   // Compute all pairwise similarities.
  37   std::vector<std::vector<double> > sims(n);
  38   for (int i = 0; i < n; ++i) { 
  39     sims[i].resize(n); 
  40     for (int j = 0; j < n; ++j) { 
  41       sims[i][j] = sim(X[i], X[j]);
  42       printf("%6.2f", sims[i][j]);
  43     }
  44     printf("  %6.1f\n", X[i]);
  45   }
  46 
  47   // Now iteratively merge the pair of clusters with the largest similarity.
  48   for (int i = 1; i < n; ++i) {
  49     printf("\nIteration #%d:\n", i);
  50 
  51     // 1. Find the pair of clusters which are most similar.
  52     double maxSim = -1;
  53     std::pair<int, int> maxSimPair = std::make_pair(-1, -1);
  54     for (int j = 0; j < n; ++j) { 
  55       for (int k = 0; k < n; ++k) { 
  56         double sim_jk = sims[j][k];
  57         if (C[j] != C[k] && sim_jk > maxSim) {
  58           maxSim = sim_jk;
  59           maxSimPair = std::make_pair(j, k);
  60         }
  61       }
  62     }
  63     assert(maxSim != -1);
  64     assert(maxSimPair.first != -1);
  65     assert(maxSimPair.second != -1);
  66     printf("Closest pair of points is (X[%d] = %.1f, X[%d] = %.1f)\n",
  67            maxSimPair.first, X[maxSimPair.first],
  68            maxSimPair.second, X[maxSimPair.second]);
  69 
  70     // 2. Merge the clusters. C always contains the smallest index of an element
  71     // in the cluster. For example, if X[3] and X[5] and X[6] are in one
  72     // cluster, then C[3] = C[5] = C[6] = 3.
  73     int c1 = C[maxSimPair.first];
  74     int c2 = C[maxSimPair.second];
  75     int c = c1 < c2 ? c1 : c2;
  76     printf("Merging clusters %d and %d into cluster %d\n",
  77            c1, c2, c);
  78     for (int j = 0; j < n; ++j) { 
  79       if (C[j] == c1 || C[j] == c2) {
  80         C[j] = c;
  81       }
  82       printf("%6d", C[j]);
  83     }
  84     printf("\n");
  85 
  86     // For now, do only one iteration.
  87     // if (i >= 3) break;
  88   }
  89   printf("\n");
  90 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2010-02-04 18:41:13, 2.3 KB) [[attachment:HierarchicalClustering.cpp]]
  • [get | view] (2010-01-12 20:07:24, 2.2 KB) [[attachment:Timer.h]]
  • [get | view] (2010-01-24 15:42:29, 443.7 KB) [[attachment:dblp.txt]]
  • [get | view] (2009-10-22 14:29:03, 44.4 KB) [[attachment:exercise-1.pdf]]
  • [get | view] (2010-01-14 16:06:21, 69.4 KB) [[attachment:exercise-10.pdf]]
  • [get | view] (2010-01-21 14:53:18, 62.0 KB) [[attachment:exercise-11.pdf]]
  • [get | view] (2010-01-28 14:22:59, 61.3 KB) [[attachment:exercise-12.pdf]]
  • [get | view] (2010-02-04 15:31:54, 59.0 KB) [[attachment:exercise-13.pdf]]
  • [get | view] (2010-02-15 17:47:13, 50.9 KB) [[attachment:exercise-14.pdf]]
  • [get | view] (2009-10-29 15:03:08, 43.5 KB) [[attachment:exercise-2.pdf]]
  • [get | view] (2009-11-05 15:24:10, 45.8 KB) [[attachment:exercise-3.pdf]]
  • [get | view] (2009-11-12 15:40:55, 55.2 KB) [[attachment:exercise-4.pdf]]
  • [get | view] (2009-11-20 03:16:57, 53.1 KB) [[attachment:exercise-5.pdf]]
  • [get | view] (2010-03-10 16:24:50, 68.6 KB) [[attachment:exercise-6.pdf]]
  • [get | view] (2009-12-03 15:42:36, 43.2 KB) [[attachment:exercise-7.pdf]]
  • [get | view] (2009-12-22 19:37:35, 57.5 KB) [[attachment:exercise-8.pdf]]
  • [get | view] (2010-01-13 23:57:57, 55.4 KB) [[attachment:exercise-9.pdf]]
  • [get | view] (2009-10-23 16:02:29, 93.8 KB) [[attachment:lecture-1.pdf]]
  • [get | view] (2010-01-17 13:46:23, 162.4 KB) [[attachment:lecture-10.pdf]]
  • [get | view] (2010-01-21 22:21:41, 154.9 KB) [[attachment:lecture-11.pdf]]
  • [get | view] (2010-01-29 16:11:40, 231.0 KB) [[attachment:lecture-12.pdf]]
  • [get | view] (2010-02-04 18:34:40, 147.8 KB) [[attachment:lecture-13.pdf]]
  • [get | view] (2010-02-11 19:36:02, 233.2 KB) [[attachment:lecture-14.pdf]]
  • [get | view] (2009-10-30 00:06:50, 198.7 KB) [[attachment:lecture-2.pdf]]
  • [get | view] (2009-11-05 21:15:21, 106.4 KB) [[attachment:lecture-3.pdf]]
  • [get | view] (2009-11-12 15:53:25, 193.3 KB) [[attachment:lecture-4.pdf]]
  • [get | view] (2009-11-19 03:29:30, 168.2 KB) [[attachment:lecture-5.pdf]]
  • [get | view] (2009-11-27 02:32:33, 151.1 KB) [[attachment:lecture-6.pdf]]
  • [get | view] (2009-12-03 21:22:18, 68.0 KB) [[attachment:lecture-7.pdf]]
  • [get | view] (2009-12-10 21:46:26, 156.3 KB) [[attachment:lecture-8.pdf]]
  • [get | view] (2010-01-07 18:55:37, 127.0 KB) [[attachment:lecture-9.pdf]]
  • [get | view] (2010-02-11 19:42:15, 110.5 KB) [[attachment:lecture-projects.pdf]]
  • [get | view] (2009-12-03 21:20:02, 10.0 KB) [[attachment:prefix-search.tar]]
  • [get | view] (2010-03-10 16:18:17, 68.2 KB) [[attachment:solution-10.pdf]]
  • [get | view] (2010-03-06 18:57:52, 53.0 KB) [[attachment:solution-11.pdf]]
  • [get | view] (2010-03-08 13:55:50, 6.3 KB) [[attachment:solution-12-1.cpp]]
  • [get | view] (2010-03-08 14:16:18, 109.0 KB) [[attachment:solution-12.pdf]]
  • [get | view] (2010-03-10 16:38:55, 44.8 KB) [[attachment:solution-6.pdf]]
  • [get | view] (2010-01-19 13:59:48, 80.4 KB) [[attachment:solution-9.pdf]]
  • [get | view] (2010-03-07 12:59:19, 85.9 KB) [[attachment:solution-midterm.pdf]]
  • [get | view] (2009-11-08 15:22:20, 25.1 KB) [[attachment:trec_queries.txt]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.

  • MoinMoin Powered
  • Python Powered
  • GPL licensed
  • Valid HTML 4.01