Skip to content

Commit f486dcd

Browse files
committed
change maxLocalProjDBSize and fix a bug (remove -3 from frequent items).
1 parent 60a0b76 commit f486dcd

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.spark.mllib.fpm
1919

20-
import scala.collection.mutable.ArrayBuffer
21-
2220
import org.apache.spark.Logging
2321
import org.apache.spark.annotation.Experimental
2422
import org.apache.spark.rdd.RDD
@@ -50,7 +48,7 @@ class PrefixSpan private (
5048
* projected database exceeds this size, another iteration of distributed PrefixSpan is run.
5149
*/
5250
// TODO: make configurable with a better default value, 10000 may be too small
53-
private val maxLocalProjDBSize: Long = 10000
51+
private val maxLocalProjDBSize: Long = 32000000L
5452

5553
/**
5654
* Constructs a default instance with default parameters
@@ -121,7 +119,7 @@ class PrefixSpan private (
121119
freqItems.flatMap { item =>
122120
val candidateSuffix = LocalPrefixSpan.getSuffix(List(item), filteredSeq)._2
123121
candidateSuffix match {
124-
case suffix if !suffix.isEmpty => Some((List(item), suffix))
122+
case suffix if suffix.nonEmpty => Some((List(item), suffix))
125123
case _ => None
126124
}
127125
}
@@ -198,7 +196,7 @@ class PrefixSpan private (
198196
// Every (prefix :+ suffix) is guaranteed to have support exceeding `minSupport`
199197
val prefixItemPairAndCounts = prefixSuffixPairs
200198
.flatMap { case (prefix, suffix) =>
201-
suffix.distinct.filter(_ != -1).map(y => ((prefix, y), 1L)) }
199+
suffix.distinct.filter(item => item != -1 && item != -3).map(y => ((prefix, y), 1L)) }
202200
.reduceByKey(_ + _)
203201
.filter(_._2 >= minCount)
204202

0 commit comments

Comments
 (0)