From f53a0751f09efcb6fe379ddec4d57d68edbd5190 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Sun, 6 May 2018 02:28:33 +0530
Subject: [PATCH 1/4] Example code for Power Iteration Clustering

---
 .../ml/PowerIterationClusteringExample.scala  | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
new file mode 100644
index 000000000000..b8dc47bfad85
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import org.apache.log4j.{Level, Logger}
+
+// $example on$
+import org.apache.spark.ml.clustering.PowerIterationClustering
+// $example off$
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+
+
+ /**
+  * An example demonstrating power iteration clustering.
+  * Run with
+  * {{{
+  * bin/run-example ml.PowerIterationClusteringExample
+  * }}}
+  */
+
+object PowerIterationClusteringExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    // $example on$
+
+    // Generates data.
+    val radius1 = 1.0
+    val numPoints1 = 5
+    val radius2 = 4.0
+    val numPoints2 = 20
+
+    val dataset = generatePICData(spark, radius1, radius2, numPoints1, numPoints2)
+
+    // Trains a PIC model.
+    val model = new PowerIterationClustering().
+      setK(2).
+      setInitMode("degree").
+      setMaxIter(20)
+
+    val prediction = model.transform(dataset).select("id", "prediction")
+
+    //  Shows the result.
+    //  println("Cluster Assignment: ")
+    val result = prediction.collect().map {
+      row => (row(1), row(0))
+    }.groupBy(_._1).mapValues(_.map(_._2))
+
+    result.foreach {
+      case (cluster, points) => println(s"$cluster -> [${points.mkString(",")}]")
+    }
+    // $example off$
+
+    spark.stop()
+  }
+
+  def generatePICData(spark: SparkSession,
+                      r1: Double,
+                      r2: Double,
+                      n1: Int,
+                      n2: Int): DataFrame = {
+    val n = n1 + n2
+    val points = genCircle(r1, n1) ++ genCircle(r2, n2)
+
+    val rows = for (i <- 0 until n) yield {
+      val neighbors = for (j <- 0 until i) yield {
+        j.toLong
+      }
+      val similarities = for (j <- 0 until i) yield {
+        sim(points(i), points(j))
+      }
+      (i.toLong, neighbors.toArray, similarities.toArray)
+    }
+    spark.createDataFrame(rows).toDF("id", "neighbors", "similarities")
+  }
+
+  /** Generates a circle of points. */
+  private def genCircle(r: Double, n: Int): Array[(Double, Double)] = {
+    Array.tabulate(n) { i =>
+      val theta = 2.0 * math.Pi * i / n
+      (r * math.cos(theta), r * math.sin(theta))
+    }
+  }
+
+  /** Computes Gaussian similarity. */
+  private def sim(x: (Double, Double), y: (Double, Double)): Double = {
+    val dist2 = (x._1 - y._1) * (x._1 - y._1) + (x._2 - y._2) * (x._2 - y._2)
+    math.exp(-dist2 / 2.0)
+  }
+}
+
+// scalastyle:on println

From a718d4a6f7ea03ffea22eba6f55d99006c482606 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Tue, 22 May 2018 01:56:16 +0530
Subject: [PATCH 2/4] Example code for Power Iteration Clustering

---
 .../ml/PowerIterationClusteringExample.scala  | 111 +++++-------------
 1 file changed, 31 insertions(+), 80 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
index b8dc47bfad85..370e0e5270ca 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
@@ -18,13 +18,10 @@
 // scalastyle:off println
 package org.apache.spark.examples.ml
 
-import org.apache.log4j.{Level, Logger}
-
 // $example on$
 import org.apache.spark.ml.clustering.PowerIterationClustering
+import org.apache.spark.sql.SparkSession
 // $example off$
-import org.apache.spark.sql.{DataFrame, Row, SparkSession}
-
 
  /**
   * An example demonstrating power iteration clustering.
@@ -36,79 +33,33 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 
 object PowerIterationClusteringExample {
 
-  def main(args: Array[String]): Unit = {
-    val spark = SparkSession
-      .builder
-      .appName(s"${this.getClass.getSimpleName}")
-      .getOrCreate()
-
-    Logger.getRootLogger.setLevel(Level.WARN)
-
-    // $example on$
-
-    // Generates data.
-    val radius1 = 1.0
-    val numPoints1 = 5
-    val radius2 = 4.0
-    val numPoints2 = 20
-
-    val dataset = generatePICData(spark, radius1, radius2, numPoints1, numPoints2)
-
-    // Trains a PIC model.
-    val model = new PowerIterationClustering().
-      setK(2).
-      setInitMode("degree").
-      setMaxIter(20)
-
-    val prediction = model.transform(dataset).select("id", "prediction")
-
-    //  Shows the result.
-    //  println("Cluster Assignment: ")
-    val result = prediction.collect().map {
-      row => (row(1), row(0))
-    }.groupBy(_._1).mapValues(_.map(_._2))
-
-    result.foreach {
-      case (cluster, points) => println(s"$cluster -> [${points.mkString(",")}]")
-    }
-    // $example off$
-
-    spark.stop()
-  }
-
-  def generatePICData(spark: SparkSession,
-                      r1: Double,
-                      r2: Double,
-                      n1: Int,
-                      n2: Int): DataFrame = {
-    val n = n1 + n2
-    val points = genCircle(r1, n1) ++ genCircle(r2, n2)
-
-    val rows = for (i <- 0 until n) yield {
-      val neighbors = for (j <- 0 until i) yield {
-        j.toLong
-      }
-      val similarities = for (j <- 0 until i) yield {
-        sim(points(i), points(j))
-      }
-      (i.toLong, neighbors.toArray, similarities.toArray)
-    }
-    spark.createDataFrame(rows).toDF("id", "neighbors", "similarities")
-  }
-
-  /** Generates a circle of points. */
-  private def genCircle(r: Double, n: Int): Array[(Double, Double)] = {
-    Array.tabulate(n) { i =>
-      val theta = 2.0 * math.Pi * i / n
-      (r * math.cos(theta), r * math.sin(theta))
-    }
-  }
-
-  /** Computes Gaussian similarity. */
-  private def sim(x: (Double, Double), y: (Double, Double)): Double = {
-    val dist2 = (x._1 - y._1) * (x._1 - y._1) + (x._2 - y._2) * (x._2 - y._2)
-    math.exp(-dist2 / 2.0)
-  }
-}
-
-// scalastyle:on println
+   def main(args: Array[String]): Unit = {
+     val spark = SparkSession
+       .builder
+       .appName(s"${this.getClass.getSimpleName}")
+       .getOrCreate()
+
+     // $example on$
+     val dataset = spark.createDataFrame(Seq(
+       (0L, Array(1L, 2L, 4L), Array(0.9, 0.9, 0.1)),
+       (1L, Array(0L, 2L), Array(0.9, 0.9)),
+       (2L, Array(0L, 1L), Array(0.9, 0.9)),
+       (3L, Array(4L), Array(0.9)),
+       (4L, Array(0L, 3L), Array(0.1, 0.9))
+     )).toDF("id", "neighbors", "similarities")
+
+     // Trains a PIC model.
+     val model = new PowerIterationClustering().
+       setK(2).
+       setInitMode("degree").
+       setMaxIter(20)
+
+     val prediction = model.transform(dataset).select("id", "prediction")
+
+     //  Shows the cluster assignment
+     prediction.show()
+     // $example off$
+
+     spark.stop()
+   }
+ }

From d0054939562ff822c2a0581af2a616f61a82e131 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Fri, 8 Jun 2018 00:22:28 +0530
Subject: [PATCH 3/4] Example code for Power Iteration Clustering

---
 .../ml/PowerIterationClusteringExample.scala    | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
index 370e0e5270ca..b9c7c97c37ee 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
@@ -41,20 +41,21 @@ object PowerIterationClusteringExample {
 
      // $example on$
      val dataset = spark.createDataFrame(Seq(
-       (0L, Array(1L, 2L, 4L), Array(0.9, 0.9, 0.1)),
-       (1L, Array(0L, 2L), Array(0.9, 0.9)),
-       (2L, Array(0L, 1L), Array(0.9, 0.9)),
-       (3L, Array(4L), Array(0.9)),
-       (4L, Array(0L, 3L), Array(0.1, 0.9))
-     )).toDF("id", "neighbors", "similarities")
+       (0L, 1L, 1.0),
+       (0L, 2L, 1.0),
+       (1L, 2L, 1.0),
+       (3L, 4L, 1.0),
+       (4L, 0L, 0.1)
+     )).toDF("src", "dst", "weight")
 
      // Trains a PIC model.
      val model = new PowerIterationClustering().
        setK(2).
+       setMaxIter(20).
        setInitMode("degree").
-       setMaxIter(20)
+       setWeightCol("weight")
 
-     val prediction = model.transform(dataset).select("id", "prediction")
+     val prediction = model.assignClusters(dataset).select("id", "cluster")
 
      //  Shows the cluster assignment
      prediction.show()

From 6b75b8c20d862dba3f7679833a081296d2a2f8a3 Mon Sep 17 00:00:00 2001
From: Shahid <shahidki31@gmail.com>
Date: Fri, 8 Jun 2018 01:38:50 +0530
Subject: [PATCH 4/4] Example code for Power Iteration Clustering

---
 .../ml/PowerIterationClusteringExample.scala       | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
index b9c7c97c37ee..ca8f7affb14e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
@@ -20,19 +20,10 @@ package org.apache.spark.examples.ml
 
 // $example on$
 import org.apache.spark.ml.clustering.PowerIterationClustering
-import org.apache.spark.sql.SparkSession
 // $example off$
-
- /**
-  * An example demonstrating power iteration clustering.
-  * Run with
-  * {{{
-  * bin/run-example ml.PowerIterationClusteringExample
-  * }}}
-  */
+import org.apache.spark.sql.SparkSession
 
 object PowerIterationClusteringExample {
-
    def main(args: Array[String]): Unit = {
      val spark = SparkSession
        .builder
@@ -48,7 +39,6 @@ object PowerIterationClusteringExample {
        (4L, 0L, 0.1)
      )).toDF("src", "dst", "weight")
 
-     // Trains a PIC model.
      val model = new PowerIterationClustering().
        setK(2).
        setMaxIter(20).
@@ -58,7 +48,7 @@ object PowerIterationClusteringExample {
      val prediction = model.assignClusters(dataset).select("id", "cluster")
 
      //  Shows the cluster assignment
-     prediction.show()
+     prediction.show(false)
      // $example off$
 
      spark.stop()