|
17 | 17 |
|
18 | 18 | package org.apache.spark.mllib.fpm |
19 | 19 |
|
20 | | -import scala.collection.mutable.ArrayBuffer |
21 | | - |
22 | 20 | import org.apache.spark.Logging |
23 | 21 | import org.apache.spark.annotation.Experimental |
24 | 22 | import org.apache.spark.rdd.RDD |
@@ -50,7 +48,7 @@ class PrefixSpan private ( |
50 | 48 | * projected database exceeds this size, another iteration of distributed PrefixSpan is run. |
51 | 49 | */ |
52 | 50 | // TODO: make configurable with a better default value, 10000 may be too small |
53 | | - private val maxLocalProjDBSize: Long = 10000 |
| 51 | + private val maxLocalProjDBSize: Long = 32000000L |
54 | 52 |
|
55 | 53 | /** |
56 | 54 | * Constructs a default instance with default parameters |
@@ -121,7 +119,7 @@ class PrefixSpan private ( |
121 | 119 | freqItems.flatMap { item => |
122 | 120 | val candidateSuffix = LocalPrefixSpan.getSuffix(List(item), filteredSeq)._2 |
123 | 121 | candidateSuffix match { |
124 | | - case suffix if !suffix.isEmpty => Some((List(item), suffix)) |
| 122 | + case suffix if suffix.nonEmpty => Some((List(item), suffix)) |
125 | 123 | case _ => None |
126 | 124 | } |
127 | 125 | } |
@@ -198,7 +196,7 @@ class PrefixSpan private ( |
198 | 196 | // Every (prefix :+ suffix) is guaranteed to have support exceeding `minSupport` |
199 | 197 | val prefixItemPairAndCounts = prefixSuffixPairs |
200 | 198 | .flatMap { case (prefix, suffix) => |
201 | | - suffix.distinct.filter(_ != -1).map(y => ((prefix, y), 1L)) } |
| 199 | + suffix.distinct.filter(item => item != -1 && item != -3).map(y => ((prefix, y), 1L)) } |
202 | 200 | .reduceByKey(_ + _) |
203 | 201 | .filter(_._2 >= minCount) |
204 | 202 |
|
|
0 commit comments