From f41975efdbe1794fe456babb7f79d0290ecfc70d Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Fri, 30 Oct 2015 16:05:20 -0700
Subject: [PATCH 1/6] Parallel personalized pagerank implementation

---
 graphx/pom.xml                                | 21 +++++
 .../org/apache/spark/graphx/GraphOps.scala    | 12 +++
 .../apache/spark/graphx/lib/PageRank.scala    | 79 +++++++++++++++++++
 .../spark/graphx/lib/PageRankSuite.scala      | 20 +++++
 4 files changed, 132 insertions(+)
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 987b831021a54..cfc259d96cb7e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -70,6 +70,27 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.scalanlp</groupId>
+      <artifactId>breeze_${scala.binary.version}</artifactId>
+      <version>0.11.2</version>
+      <exclusions>
+        <!-- This is included as a compile-scoped dependency by jtransforms, which is
+             a dependency of breeze. -->
+        <exclusion>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.commons</groupId>
+          <artifactId>commons-math3</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 9451ff1e5c0e2..318e5d47bbf56 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -26,6 +26,8 @@ import org.apache.spark.rdd.RDD
 
 import org.apache.spark.graphx.lib._
 
+import breeze.linalg.SparseVector
+
 /**
  * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
  * efficient GraphX API. This class is implicitly constructed for each Graph object.
@@ -384,6 +386,16 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
     PageRank.runUntilConvergenceWithOptions(graph, tol, resetProb, Some(src))
   }
 
+  /**
+   * Run parallel personalized PageRank for a given array of source vertices, such
+   * that all random walks are started relative to the source vertices
+   */
+  def staticParallelPersonalizedPageRank(sources : Array[VertexId], numIter: Int,
+    resetProb: Double = 0.15) : Graph[SparseVector[Double], Double] = {
+    PageRank.runParallelPersonalizedPageRank(graph, numIter, resetProb, sources)
+  }
+
+
   /**
    * Run Personalized PageRank for a fixed number of iterations with
    * with all iterations originating at the source node
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 52b237fc15093..ecf3f55d925d9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -23,6 +23,8 @@ import scala.language.postfixOps
 import org.apache.spark.Logging
 import org.apache.spark.graphx._
 
+import breeze.linalg.SparseVector
+
 /**
  * PageRank algorithm implementation. There are two implementations of PageRank implemented.
  *
@@ -158,6 +160,83 @@ object PageRank extends Logging {
     rankGraph
   }
 
+  /**
+   * Run Personalized PageRank for a fixed number of iterations, for a
+   * set of starting nodes in parallel. Returns a graph with vertex attributes
+   * containing the pagerank relative to all starting nodes (as a sparse vector) and
+   * edge attributes the normalized edge weight
+   *
+   * @tparam VD The original vertex attribute (not used)
+   * @tparam ED The original edge attribute (not used)
+   *
+   * @param graph The graph on which to compute personalized pagerank
+   * @param numIter The number of iterations to run
+   * @param resetProb The random reset probability
+   * @param sources The list of sources to compute personalized pagerank from
+   * @return the graph with vertex attributes
+   *         containing the pagerank relative to all starting nodes (as a sparse vector) and
+   *         edge attributes the normalized edge weight
+   */
+  def runParallelPersonalizedPageRank[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED],
+    numIter: Int, resetProb: Double = 0.15,
+    sources : Array[VertexId]): Graph[SparseVector[Double], Double] =
+  {
+    // TODO if one sources vertex id is outside of the int range
+    // we won't be able to store its activations in a sparse vector
+    val zero = new SparseVector[Double](Array(), Array(), sources.size)
+    val sourcesInitMap = sources.zipWithIndex.map{case (vid, i) => {
+      val v = new SparseVector[Double](Array(i), Array(resetProb), sources.size)
+      (vid, v)
+    }}.toMap
+    val sc = graph.vertices.sparkContext
+    val sourcesInitMapBC = sc.broadcast(sourcesInitMap)
+    // Initialize the PageRank graph with each edge attribute having
+    // weight 1/outDegree and each source vertex with attribute 1.0.
+    var rankGraph = graph
+      // Associate the degree with each vertex
+      .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
+      // Set the weight on the edges based on the degree
+      .mapTriplets( e => 1.0 / e.srcAttr, TripletFields.Src )
+      .mapVertices( (vid, attr) => {
+      if (sourcesInitMapBC.value contains vid) {
+        sourcesInitMapBC.value(vid)
+      } else {
+        zero
+      }
+    })
+
+    var i = 0
+    while (i < numIter) {
+      val prevRankGraph = rankGraph
+      // Propagates the message along outbound edges
+      // and adding start nodes back in with activation resetProb
+      val rankUpdates = rankGraph.aggregateMessages[SparseVector[Double]](
+        ctx => ctx.sendToDst(ctx.srcAttr :* ctx.attr),
+        (a : SparseVector[Double], b : SparseVector[Double]) => a :+ b, TripletFields.Src)
+
+      rankGraph = rankGraph.joinVertices(rankUpdates) {
+        (vid, oldRank, msgSum) => {
+          val popActivations : SparseVector[Double] = msgSum :* (1.0 - resetProb)
+          val resetActivations = if (sourcesInitMapBC.value contains vid) {
+            sourcesInitMapBC.value(vid)
+          } else {
+            zero
+          }
+          popActivations :+ resetActivations
+        }}.cache()
+
+      rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
+      prevRankGraph.vertices.unpersist(false)
+      prevRankGraph.edges.unpersist(false)
+
+      logInfo(s"Parallel Personalized PageRank finished iteration $i.")
+
+      i += 1
+    }
+
+    rankGraph
+  }
+
   /**
    * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
    * PageRank and edge attributes containing the normalized edge weight.
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index bdff31446f8ee..588db51474db9 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -118,11 +118,26 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       val dynamicRanks = starGraph.personalizedPageRank(0, 0, resetProb).vertices.cache()
       assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
 
+      val parallelStaticRanks1 = starGraph.staticParallelPersonalizedPageRank(Array(0), 1, resetProb).mapVertices{
+        case (vertexId, vector) => vector(0)
+      }.vertices.cache()
+      assert(compareRanks(staticRanks1, parallelStaticRanks1) < errorTol)
+
+      val parallelStaticRanks2 = starGraph.staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+        case (vertexId, vector) => vector(0)
+      }.vertices.cache()
+      assert(compareRanks(staticRanks2, parallelStaticRanks2) < errorTol)
+
       // We have one outbound edge from 1 to 0
       val otherStaticRanks2 = starGraph.staticPersonalizedPageRank(1, numIter = 2, resetProb)
         .vertices.cache()
       val otherDynamicRanks = starGraph.personalizedPageRank(1, 0, resetProb).vertices.cache()
+      val otherParallelStaticRanks2 = starGraph.staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+        case (vertexId, vector) => vector(1)
+      }.vertices.cache()
       assert(compareRanks(otherDynamicRanks, otherStaticRanks2) < errorTol)
+      assert(compareRanks(otherStaticRanks2, otherParallelStaticRanks2) < errorTol)
+      assert(compareRanks(otherDynamicRanks, otherParallelStaticRanks2) < errorTol)
     }
   } // end of test Star PersonalPageRank
 
@@ -177,6 +192,11 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       val dynamicRanks = chain.personalizedPageRank(4, tol, resetProb).vertices
 
       assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
+
+      val parallelStaticRanks = chain.staticParallelPersonalizedPageRank(Array(4), numIter, resetProb).mapVertices{
+        case (vertexId, vector) => vector(0)
+      }.vertices.cache()
+      assert(compareRanks(staticRanks, parallelStaticRanks) < errorTol)
     }
   }
 }

From 3605e40e8e61c853c89637918c82c7e0bcec69e8 Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Wed, 4 Nov 2015 09:39:05 -0800
Subject: [PATCH 2/6] Scala style tweaks

---
 .../spark/graphx/lib/PageRankSuite.scala      | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index 588db51474db9..5306891ec4e9d 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -118,23 +118,26 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       val dynamicRanks = starGraph.personalizedPageRank(0, 0, resetProb).vertices.cache()
       assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
 
-      val parallelStaticRanks1 = starGraph.staticParallelPersonalizedPageRank(Array(0), 1, resetProb).mapVertices{
-        case (vertexId, vector) => vector(0)
-      }.vertices.cache()
+      val parallelStaticRanks1 = starGraph
+        .staticParallelPersonalizedPageRank(Array(0), 1, resetProb).mapVertices{
+          case (vertexId, vector) => vector(0)
+        }.vertices.cache()
       assert(compareRanks(staticRanks1, parallelStaticRanks1) < errorTol)
 
-      val parallelStaticRanks2 = starGraph.staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
-        case (vertexId, vector) => vector(0)
-      }.vertices.cache()
+      val parallelStaticRanks2 = starGraph
+        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+          case (vertexId, vector) => vector(0)
+        }.vertices.cache()
       assert(compareRanks(staticRanks2, parallelStaticRanks2) < errorTol)
 
       // We have one outbound edge from 1 to 0
       val otherStaticRanks2 = starGraph.staticPersonalizedPageRank(1, numIter = 2, resetProb)
         .vertices.cache()
       val otherDynamicRanks = starGraph.personalizedPageRank(1, 0, resetProb).vertices.cache()
-      val otherParallelStaticRanks2 = starGraph.staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
-        case (vertexId, vector) => vector(1)
-      }.vertices.cache()
+      val otherParallelStaticRanks2 = starGraph
+        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+          case (vertexId, vector) => vector(1)
+        }.vertices.cache()
       assert(compareRanks(otherDynamicRanks, otherStaticRanks2) < errorTol)
       assert(compareRanks(otherStaticRanks2, otherParallelStaticRanks2) < errorTol)
       assert(compareRanks(otherDynamicRanks, otherParallelStaticRanks2) < errorTol)
@@ -193,9 +196,10 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
 
       assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
 
-      val parallelStaticRanks = chain.staticParallelPersonalizedPageRank(Array(4), numIter, resetProb).mapVertices{
-        case (vertexId, vector) => vector(0)
-      }.vertices.cache()
+      val parallelStaticRanks = chain
+        .staticParallelPersonalizedPageRank(Array(4), numIter, resetProb).mapVertices{
+          case (vertexId, vector) => vector(0)
+        }.vertices.cache()
       assert(compareRanks(staticRanks, parallelStaticRanks) < errorTol)
     }
   }

From 8b34e5ce0fd25423349c4ded45859ef3a02153cf Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Mon, 23 Nov 2015 17:26:43 -0800
Subject: [PATCH 3/6] Removing breeze dependency from mllib (available through
 graphx)

---
 mllib/pom.xml | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/mllib/pom.xml b/mllib/pom.xml
index 70139121d8c78..712603ede5a19 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -68,23 +68,6 @@
       <version>${jblas.version}</version>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.scalanlp</groupId>
-      <artifactId>breeze_${scala.binary.version}</artifactId>
-      <version>0.11.2</version>
-      <exclusions>
-        <!-- This is included as a compile-scoped dependency by jtransforms, which is
-             a dependency of breeze. -->
-        <exclusion>
-          <groupId>junit</groupId>
-          <artifactId>junit</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.commons</groupId>
-          <artifactId>commons-math3</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>

From 508ba451eb636365388fc5bf77b531ce1d3d70d4 Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Mon, 23 Nov 2015 17:29:02 -0800
Subject: [PATCH 4/6] Renaming SparseVector to BSV

---
 .../scala/org/apache/spark/graphx/GraphOps.scala   |  4 ++--
 .../org/apache/spark/graphx/lib/PageRank.scala     | 14 +++++++-------
 .../apache/spark/graphx/lib/PageRankSuite.scala    |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 318e5d47bbf56..5e1468d62e878 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 
 import org.apache.spark.graphx.lib._
 
-import breeze.linalg.SparseVector
+import breeze.linalg.{SparseVector => BSV}
 
 /**
  * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
@@ -391,7 +391,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * that all random walks are started relative to the source vertices
    */
   def staticParallelPersonalizedPageRank(sources : Array[VertexId], numIter: Int,
-    resetProb: Double = 0.15) : Graph[SparseVector[Double], Double] = {
+    resetProb: Double = 0.15) : Graph[BSV[Double], Double] = {
     PageRank.runParallelPersonalizedPageRank(graph, numIter, resetProb, sources)
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index ecf3f55d925d9..339c4c6a25d0b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -23,7 +23,7 @@ import scala.language.postfixOps
 import org.apache.spark.Logging
 import org.apache.spark.graphx._
 
-import breeze.linalg.SparseVector
+import breeze.linalg.{SparseVector => BSV}
 
 /**
  * PageRank algorithm implementation. There are two implementations of PageRank implemented.
@@ -179,13 +179,13 @@ object PageRank extends Logging {
    */
   def runParallelPersonalizedPageRank[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED],
     numIter: Int, resetProb: Double = 0.15,
-    sources : Array[VertexId]): Graph[SparseVector[Double], Double] =
+    sources : Array[VertexId]): Graph[BSV[Double], Double] =
   {
     // TODO if one sources vertex id is outside of the int range
     // we won't be able to store its activations in a sparse vector
-    val zero = new SparseVector[Double](Array(), Array(), sources.size)
+    val zero = new BSV[Double](Array(), Array(), sources.size)
     val sourcesInitMap = sources.zipWithIndex.map{case (vid, i) => {
-      val v = new SparseVector[Double](Array(i), Array(resetProb), sources.size)
+      val v = new BSV[Double](Array(i), Array(resetProb), sources.size)
       (vid, v)
     }}.toMap
     val sc = graph.vertices.sparkContext
@@ -210,13 +210,13 @@ object PageRank extends Logging {
       val prevRankGraph = rankGraph
       // Propagates the message along outbound edges
       // and adding start nodes back in with activation resetProb
-      val rankUpdates = rankGraph.aggregateMessages[SparseVector[Double]](
+      val rankUpdates = rankGraph.aggregateMessages[BSV[Double]](
         ctx => ctx.sendToDst(ctx.srcAttr :* ctx.attr),
-        (a : SparseVector[Double], b : SparseVector[Double]) => a :+ b, TripletFields.Src)
+        (a : BSV[Double], b : BSV[Double]) => a :+ b, TripletFields.Src)
 
       rankGraph = rankGraph.joinVertices(rankUpdates) {
         (vid, oldRank, msgSum) => {
-          val popActivations : SparseVector[Double] = msgSum :* (1.0 - resetProb)
+          val popActivations : BSV[Double] = msgSum :* (1.0 - resetProb)
           val resetActivations = if (sourcesInitMapBC.value contains vid) {
             sourcesInitMapBC.value(vid)
           } else {
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index 5306891ec4e9d..a0e6d3fe129b6 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -119,7 +119,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
 
       val parallelStaticRanks1 = starGraph
-        .staticParallelPersonalizedPageRank(Array(0), 1, resetProb).mapVertices{
+        .staticParallelPersonalizedPageRank(Array(0), 1, resetProb).mapVertices {
           case (vertexId, vector) => vector(0)
         }.vertices.cache()
       assert(compareRanks(staticRanks1, parallelStaticRanks1) < errorTol)

From 09d31c852e8213a2923de68b6e3f2b3ec6f71166 Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Mon, 23 Nov 2015 17:30:39 -0800
Subject: [PATCH 5/6] Removing extra space, extra line

---
 graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 5e1468d62e878..e97b1ceb29c08 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -390,12 +390,11 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * Run parallel personalized PageRank for a given array of source vertices, such
    * that all random walks are started relative to the source vertices
    */
-  def staticParallelPersonalizedPageRank(sources : Array[VertexId], numIter: Int,
+  def staticParallelPersonalizedPageRank(sources: Array[VertexId], numIter: Int,
     resetProb: Double = 0.15) : Graph[BSV[Double], Double] = {
     PageRank.runParallelPersonalizedPageRank(graph, numIter, resetProb, sources)
   }
 
-
   /**
    * Run Personalized PageRank for a fixed number of iterations with
    * with all iterations originating at the source node

From 85063535bd29936c260844f1fd815faccb474169 Mon Sep 17 00:00:00 2001
From: Yves Raimond <yraimond@netflix.com>
Date: Mon, 23 Nov 2015 17:39:41 -0800
Subject: [PATCH 6/6] Code-style changes

---
 .../apache/spark/graphx/lib/PageRank.scala    | 29 +++++++++----------
 .../spark/graphx/lib/PageRankSuite.scala      |  6 ++--
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 339c4c6a25d0b..cd34061415ec9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -179,15 +179,14 @@ object PageRank extends Logging {
    */
   def runParallelPersonalizedPageRank[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED],
     numIter: Int, resetProb: Double = 0.15,
-    sources : Array[VertexId]): Graph[BSV[Double], Double] =
-  {
+    sources: Array[VertexId]): Graph[BSV[Double], Double] = {
     // TODO if one sources vertex id is outside of the int range
     // we won't be able to store its activations in a sparse vector
     val zero = new BSV[Double](Array(), Array(), sources.size)
-    val sourcesInitMap = sources.zipWithIndex.map{case (vid, i) => {
+    val sourcesInitMap = sources.zipWithIndex.map { case (vid, i) =>
       val v = new BSV[Double](Array(i), Array(resetProb), sources.size)
       (vid, v)
-    }}.toMap
+    }.toMap
     val sc = graph.vertices.sparkContext
     val sourcesInitMapBC = sc.broadcast(sourcesInitMap)
     // Initialize the PageRank graph with each edge attribute having
@@ -196,14 +195,14 @@ object PageRank extends Logging {
       // Associate the degree with each vertex
       .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
       // Set the weight on the edges based on the degree
-      .mapTriplets( e => 1.0 / e.srcAttr, TripletFields.Src )
-      .mapVertices( (vid, attr) => {
-      if (sourcesInitMapBC.value contains vid) {
-        sourcesInitMapBC.value(vid)
-      } else {
-        zero
+      .mapTriplets(e => 1.0 / e.srcAttr, TripletFields.Src)
+      .mapVertices { (vid, attr) =>
+        if (sourcesInitMapBC.value contains vid) {
+          sourcesInitMapBC.value(vid)
+        } else {
+          zero
+        }
       }
-    })
 
     var i = 0
     while (i < numIter) {
@@ -212,18 +211,18 @@ object PageRank extends Logging {
       // and adding start nodes back in with activation resetProb
       val rankUpdates = rankGraph.aggregateMessages[BSV[Double]](
         ctx => ctx.sendToDst(ctx.srcAttr :* ctx.attr),
-        (a : BSV[Double], b : BSV[Double]) => a :+ b, TripletFields.Src)
+        (a: BSV[Double], b: BSV[Double]) => a :+ b, TripletFields.Src)
 
       rankGraph = rankGraph.joinVertices(rankUpdates) {
-        (vid, oldRank, msgSum) => {
-          val popActivations : BSV[Double] = msgSum :* (1.0 - resetProb)
+        (vid, oldRank, msgSum) =>
+          val popActivations: BSV[Double] = msgSum :* (1.0 - resetProb)
           val resetActivations = if (sourcesInitMapBC.value contains vid) {
             sourcesInitMapBC.value(vid)
           } else {
             zero
           }
           popActivations :+ resetActivations
-        }}.cache()
+        }.cache()
 
       rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
       prevRankGraph.vertices.unpersist(false)
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index a0e6d3fe129b6..b6305c8d00aba 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -125,7 +125,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       assert(compareRanks(staticRanks1, parallelStaticRanks1) < errorTol)
 
       val parallelStaticRanks2 = starGraph
-        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices {
           case (vertexId, vector) => vector(0)
         }.vertices.cache()
       assert(compareRanks(staticRanks2, parallelStaticRanks2) < errorTol)
@@ -135,7 +135,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
         .vertices.cache()
       val otherDynamicRanks = starGraph.personalizedPageRank(1, 0, resetProb).vertices.cache()
       val otherParallelStaticRanks2 = starGraph
-        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices{
+        .staticParallelPersonalizedPageRank(Array(0, 1), 2, resetProb).mapVertices {
           case (vertexId, vector) => vector(1)
         }.vertices.cache()
       assert(compareRanks(otherDynamicRanks, otherStaticRanks2) < errorTol)
@@ -197,7 +197,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
 
       val parallelStaticRanks = chain
-        .staticParallelPersonalizedPageRank(Array(4), numIter, resetProb).mapVertices{
+        .staticParallelPersonalizedPageRank(Array(4), numIter, resetProb).mapVertices {
           case (vertexId, vector) => vector(0)
         }.vertices.cache()
       assert(compareRanks(staticRanks, parallelStaticRanks) < errorTol)