From 91fc11639d56cc3f283e383e589808b6ab9bd0f7 Mon Sep 17 00:00:00 2001 From: Lu WANG Date: Mon, 4 Jun 2018 12:16:37 -0700 Subject: [PATCH 1/2] change the way to generate the random seed in LDASuite --- .../test/scala/org/apache/spark/ml/clustering/LDASuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala index 4d848205034c0..5c26d2a12f74a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala @@ -36,9 +36,9 @@ object LDASuite { vocabSize: Int): DataFrame = { val avgWC = 1 // average instances of each word in a doc val sc = spark.sparkContext - val rng = new java.util.Random() - rng.setSeed(1) val rdd = sc.parallelize(1 to rows).map { i => + val rng = new java.util.Random() + rng.setSeed(i) Vectors.dense(Array.fill(vocabSize)(rng.nextInt(2 * avgWC).toDouble)) }.map(v => new TestRow(v)) spark.createDataFrame(rdd) From fa2c42261a20a9c2a980889c175dd320c4c5d836 Mon Sep 17 00:00:00 2001 From: Lu WANG Date: Mon, 4 Jun 2018 15:24:27 -0700 Subject: [PATCH 2/2] minnor fix for setting random seed --- .../test/scala/org/apache/spark/ml/clustering/LDASuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala index 5c26d2a12f74a..bed087d0e0adb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala @@ -37,8 +37,7 @@ object LDASuite { val avgWC = 1 // average instances of each word in a doc val sc = spark.sparkContext val rdd = sc.parallelize(1 to rows).map { i => - val rng = new java.util.Random() - rng.setSeed(i) + val rng = new java.util.Random(i) Vectors.dense(Array.fill(vocabSize)(rng.nextInt(2 * avgWC).toDouble)) }.map(v => new TestRow(v)) spark.createDataFrame(rdd)