@@ -39,27 +39,23 @@ import org.apache.spark.{SparkConf, SparkContext}
3939 * n: Number of sampled points on innermost circle.. There are proportionally more points
4040 * within the outer/larger circles
4141 * maxIterations: Number of Power Iterations
42- * outerRadius: radius of the outermost of the concentric circles
4342 * }}}
4443 *
4544 * Here is a sample run and output:
4645 *
47- * ./bin/run-example mllib.PowerIterationClusteringExample -k 3 --n 30 --maxIterations 15
48- *
49- * Cluster assignments: 1 -> [0,1,2,3,4],2 -> [5,6,7,8,9,10,11,12,13,14],
50- * 0 -> [15,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
46+ * ./bin/run-example mllib.PowerIterationClusteringExample -k 2 --n 10 --maxIterations 15
5147 *
48+ * Cluster assignments: 1 -> [0,1,2,3,4,5,6,7,8,9],
49+ * 0 -> [10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
5250 *
5351 * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
5452 */
5553object PowerIterationClusteringExample {
5654
5755 case class Params (
58- input : String = null ,
59- k : Int = 3 ,
60- numPoints : Int = 5 ,
61- maxIterations : Int = 10 ,
62- outerRadius : Double = 3.0
56+ k : Int = 2 ,
57+ numPoints : Int = 10 ,
58+ maxIterations : Int = 15
6359 ) extends AbstractParams [Params ]
6460
6561 def main (args : Array [String ]) {
@@ -68,17 +64,14 @@ object PowerIterationClusteringExample {
6864 val parser = new OptionParser [Params ](" PowerIterationClusteringExample" ) {
6965 head(" PowerIterationClusteringExample: an example PIC app using concentric circles." )
7066 opt[Int ]('k' , " k" )
71- .text(s " number of circles (/ clusters), default: ${defaultParams.k}" )
67+ .text(s " number of circles (clusters), default: ${defaultParams.k}" )
7268 .action((x, c) => c.copy(k = x))
7369 opt[Int ]('n' , " n" )
7470 .text(s " number of points in smallest circle, default: ${defaultParams.numPoints}" )
7571 .action((x, c) => c.copy(numPoints = x))
7672 opt[Int ](" maxIterations" )
7773 .text(s " number of iterations, default: ${defaultParams.maxIterations}" )
7874 .action((x, c) => c.copy(maxIterations = x))
79- opt[Double ]('r' , " r" )
80- .text(s " radius of outermost circle, default: ${defaultParams.outerRadius}" )
81- .action((x, c) => c.copy(outerRadius = x))
8275 }
8376
8477 parser.parse(args, defaultParams).map { params =>
@@ -96,20 +89,21 @@ object PowerIterationClusteringExample {
9689
9790 Logger .getRootLogger.setLevel(Level .WARN )
9891
99- val circlesRdd = generateCirclesRdd(sc, params.k, params.numPoints, params.outerRadius )
92+ val circlesRdd = generateCirclesRdd(sc, params.k, params.numPoints)
10093 val model = new PowerIterationClustering ()
10194 .setK(params.k)
10295 .setMaxIterations(params.maxIterations)
96+ .setInitializationMode(" degree" )
10397 .run(circlesRdd)
10498
10599 val clusters = model.assignments.collect().groupBy(_.cluster).mapValues(_.map(_.id))
106- val assignments = clusters.toList.sortBy { case (k, v) => v.length}
100+ val assignments = clusters.toList.sortBy { case (k, v) => v.length }
107101 val assignmentsStr = assignments
108102 .map { case (k, v) =>
109103 s " $k -> ${v.sorted.mkString(" [" , " ," , " ]" )}"
110- }.mkString(" ," )
104+ }.mkString(" , " )
111105 val sizesStr = assignments.map {
112- _._2.size
106+ _._2.length
113107 }.sorted.mkString(" (" , " ," , " )" )
114108 println(s " Cluster assignments: $assignmentsStr\n cluster sizes: $sizesStr" )
115109
@@ -123,20 +117,17 @@ object PowerIterationClusteringExample {
123117 }
124118 }
125119
126- def generateCirclesRdd (sc : SparkContext ,
127- nCircles : Int = 3 ,
128- nPoints : Int = 30 ,
129- outerRadius : Double ): RDD [(Long , Long , Double )] = {
130-
131- val radii = Array .tabulate(nCircles) { cx => outerRadius / (nCircles - cx)}
132- val groupSizes = Array .tabulate(nCircles) { cx => (cx + 1 ) * nPoints}
133- val points = (0 until nCircles).flatMap { cx =>
134- generateCircle(radii(cx), groupSizes(cx))
120+ def generateCirclesRdd (
121+ sc : SparkContext ,
122+ nCircles : Int ,
123+ nPoints : Int ): RDD [(Long , Long , Double )] = {
124+ val points = (1 to nCircles).flatMap { i =>
125+ generateCircle(i, i * nPoints)
135126 }.zipWithIndex
136127 val rdd = sc.parallelize(points)
137128 val distancesRdd = rdd.cartesian(rdd).flatMap { case (((x0, y0), i0), ((x1, y1), i1)) =>
138129 if (i0 < i1) {
139- Some ((i0.toLong, i1.toLong, gaussianSimilarity((x0, y0), (x1, y1), 1.0 )))
130+ Some ((i0.toLong, i1.toLong, gaussianSimilarity((x0, y0), (x1, y1))))
140131 } else {
141132 None
142133 }
@@ -147,11 +138,9 @@ object PowerIterationClusteringExample {
147138 /**
148139 * Gaussian Similarity: http://en.wikipedia.org/wiki/Radial_basis_function_kernel
149140 */
150- def gaussianSimilarity (p1 : (Double , Double ), p2 : (Double , Double ), sigma : Double ): Double = {
151- val coeff = 1.0 / (math.sqrt(2.0 * math.Pi ) * sigma)
152- val expCoeff = - 1.0 / 2.0 * math.pow(sigma, 2.0 )
141+ def gaussianSimilarity (p1 : (Double , Double ), p2 : (Double , Double )): Double = {
153142 val ssquares = (p1._1 - p2._1) * (p1._1 - p2._1) + (p1._2 - p2._2) * (p1._2 - p2._2)
154- coeff * math.exp(expCoeff * ssquares )
143+ math.exp(- ssquares / 2.0 )
155144 }
156145}
157146
0 commit comments