Skip to content

Commit 7b77ad7

Browse files
author
Jacky Li
committed
fix scalastyle check
1 parent f68a0bd commit 7b77ad7

File tree

3 files changed

+32
-46
lines changed

3 files changed

+32
-46
lines changed

mllib/src/main/scala/org/apache/spark/mllib/fim/AprioriByBroadcast.scala

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ object AprioriByBroadcast extends Logging with Serializable {
3636
* @return single item in data Set
3737
*/
3838
def createC1(dataSet: RDD[Array[String]]): Array[Array[String]] = {
39-
//get all distinct item in the RDD
39+
// get all distinct item in the RDD
4040
val itemCollection = dataSet.flatMap(line => line).distinct().collect()
4141

42-
//define new array which item is an array form
42+
// define new array which item is an array form
4343
val itemArrCollection = collection.mutable.ArrayBuffer[Array[String]]()
4444

45-
//change the itemsCollection into itemArrCollection
45+
// change the itemsCollection into itemArrCollection
4646
for (item <- itemCollection) {
4747
itemArrCollection += Array[String](item)
4848
}
@@ -62,10 +62,8 @@ object AprioriByBroadcast extends Logging with Serializable {
6262
Ck: Array[Array[String]],
6363
minCount: Double,
6464
sc: SparkContext): Array[(Array[String], Int)] = {
65-
//broadcast Ck
65+
// broadcast Ck
6666
val broadcastCk = sc.broadcast(Ck)
67-
//val broadcastCkList: Array[Array[String]] = broadcastCk.value
68-
6967
val Lk = dataSet.flatMap(line => containCk(line, broadcastCk))
7068
.filter(_.length > 0)
7169
.map(v => (v, 1))
@@ -122,7 +120,7 @@ object AprioriByBroadcast extends Logging with Serializable {
122120
val LkLen = Lk.length
123121
val CkBuffer = collection.mutable.ArrayBuffer[Array[String]]()
124122

125-
//get Ck from Lk
123+
// get Ck from Lk
126124
for (i <- 0 to LkLen - 1)
127125
for (j <- i + 1 to LkLen - 1) {
128126
// get Lk:k-2 before k-2 item
@@ -183,16 +181,16 @@ object AprioriByBroadcast extends Logging with Serializable {
183181
minSupport: Double,
184182
sc: SparkContext): Array[(Set[String], Int)] = {
185183

186-
//dataSet length
184+
// dataSet length
187185
val dataSetLen: Long = dataSet.count()
188-
//the count line for minSupport
186+
// the count line for minSupport
189187
val minCount = minSupport * dataSetLen
190188

191-
//definite L collection that using save all of frequent item set
189+
// definite L collection that using save all of frequent item set
192190
val L = collection.mutable.ArrayBuffer[Array[(Array[String], Int)]]()
193191
val FIS = collection.mutable.ArrayBuffer[(Set[String], Int)]()
194192

195-
//call aprioriStepOne method to get L1
193+
// call aprioriStepOne method to get L1
196194
val L1: Array[(Array[String], Int)] = aprioriStepOne(dataSet, minCount)
197195
logDebug("L1 length:" + L1.length)
198196
logDebug("L1:" + L1)
@@ -210,11 +208,11 @@ object AprioriByBroadcast extends Logging with Serializable {
210208
// do the loop while the k > 0 and L length > 1
211209
while ((k > 0) && L(k - 2).length > 1) {
212210

213-
//call createCk method to get Ck
211+
// call createCk method to get Ck
214212
val Ck: Array[Array[String]] = aprioriGen(L(k - 2), k)
215213

216214
if (Ck != null) {
217-
//call createLk method to get Lk
215+
// call createLk method to get Lk
218216
val Lk: Array[(Array[String], Int)] =
219217
scanD(
220218
dataSet,

mllib/src/main/scala/org/apache/spark/mllib/fim/AprioriByCartesian.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,14 @@ object AprioriByCartesian extends Logging with Serializable {
9191
minSupport: Double,
9292
sc: SparkContext): Array[(Set[String], Int)] = {
9393

94-
//dataSet length
94+
// dataSet length
9595
val dataSetLen: Long = input.count()
96-
//the count line for minSupport
96+
// the count line for minSupport
9797
val minCount = minSupport * dataSetLen
9898
// This algorithm finds frequent item set, so convert each element of RDD to set
9999
val dataSet = input.map(_.toSet)
100100

101-
//definite L collection that using save all of frequent item set
101+
// definite L collection that using save all of frequent item set
102102
val L = collection.mutable.ArrayBuffer[RDD[(Set[String], Int)]]()
103103

104104
val L1: RDD[(Set[String], Int)] = aprioriStepOne(dataSet, minCount)
@@ -121,7 +121,7 @@ object AprioriByCartesian extends Logging with Serializable {
121121
k = k + 1
122122
L += Lk
123123
}
124-
//return all result in L
124+
// return all result in L
125125
val retArr = collection.mutable.ArrayBuffer[(Set[String], Int)]()
126126
for (l <- L) {
127127
retArr.appendAll(l.collect())

mllib/src/main/scala/org/apache/spark/mllib/fim/FPGrowth.scala

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
package org.apache.spark.mllib.fim
1919

20-
import org.apache.spark.{Logging, SparkContext}
2120
import org.apache.spark.SparkContext._
22-
import org.apache.spark.rdd.RDD
2321
import org.apache.spark.broadcast._
22+
import org.apache.spark.rdd.RDD
23+
import org.apache.spark.{Logging, SparkContext}
24+
25+
import scala.collection.mutable.{ArrayBuffer, Map}
2426

2527
/**
2628
* calculate frequent item set using FPGrowth algorithm with dada set and minSupport
@@ -47,18 +49,15 @@ class FPGrowth extends Logging with Serializable {
4749
val minCount = minSuport * count
4850
logDebug("minSuppot count:" + minSuport)
4951

50-
//one times scan data db to get L1
52+
// one times scan data db to get L1
5153
val L1 = FPGStepOne(RDD, minCount)
5254
logDebug("L1 length:" + L1.length)
5355
logDebug("L1:" + L1)
5456

55-
//two times scan data db to get Ln
57+
// two times scan data db to get Ln
5658
val Ln = FPGStepTwo(sc, RDD, minCount, L1)
57-
//add L1 and Ln to get fim
58-
val fim = L1 ++ Ln
59-
60-
return fim
61-
59+
// add L1 and Ln to get fim, and return it
60+
L1 ++ Ln
6261
}
6362

6463
/**
@@ -90,10 +89,8 @@ class FPGrowth extends Logging with Serializable {
9089
RDD: RDD[Array[String]],
9190
minCount: Double,
9291
L1: Array[(String, Int)]): Array[(String, Int)] = {
93-
//broadcast L1
92+
// broadcast L1
9493
val bdL1 = sc.broadcast(L1)
95-
//val bdL1List = bdL1.value
96-
9794
RDD.flatMap(line => L12LineMap(line, bdL1))
9895
.groupByKey()
9996
.flatMap(line => FPTree(line, minCount))
@@ -117,16 +114,13 @@ class FPGrowth extends Logging with Serializable {
117114
// broadcast value
118115
val bdL1List = bdL1.value
119116
// the result variable
120-
var lineArrayBuffer = collection.mutable.ArrayBuffer[(String, Int)]()
117+
var lineArrayBuffer = ArrayBuffer[(String, Int)]()
121118

122119
for (item <- line) {
123-
124120
val opt = bdL1List.find(_._1.equals(item))
125-
126121
if (opt != None) {
127122
lineArrayBuffer ++= opt
128123
}
129-
130124
}
131125

132126
// sort array
@@ -135,32 +129,28 @@ class FPGrowth extends Logging with Serializable {
135129
.sortWith(_._2 > _._2)
136130
.toArray
137131

138-
139-
var arrArrayBuffer = collection.mutable.ArrayBuffer[(String, Array[String])]()
132+
var arrArrayBuffer = ArrayBuffer[(String, Array[String])]()
140133

141134
/**
142135
* give (a,4) (b 3),(c,3),after
143136
* b,((a,4)
144137
* c,((a,4) (b 3))
145138
*/
146-
var arrBuffer = collection.mutable.ArrayBuffer[String]()
139+
var arrBuffer = ArrayBuffer[String]()
147140
for (item <- lineArray) {
148141
val arr = lineArray.take(lineArray.indexOf(item))
149142

150143
arrBuffer.clear()
151144

152145
if (arr.length > 0) {
153146
for (tempArr <- arr) {
154-
//remain key
155147
arrBuffer += tempArr._1
156148
}
157149
arrArrayBuffer += ((item._1, arrBuffer.toArray))
158150
}
159151

160152
}
161-
162-
return arrArrayBuffer.toArray
163-
153+
arrArrayBuffer.toArray
164154
}
165155

166156
/**
@@ -175,8 +165,8 @@ class FPGrowth extends Logging with Serializable {
175165
// the set of construction CPFTree
176166
val value = line._2
177167

178-
val _lineBuffer = collection.mutable.ArrayBuffer[(String, Int)]()
179-
val map = scala.collection.mutable.Map[String, Int]()
168+
val resultBuffer = ArrayBuffer[(String, Int)]()
169+
val map = Map[String, Int]()
180170
// tree step
181171
var k = 1
182172
// loop the data set while k>0
@@ -215,7 +205,7 @@ class FPGrowth extends Logging with Serializable {
215205

216206
if (lineTemp.size != 0) {
217207
line = lineTemp.toArray.array
218-
_lineBuffer ++= line
208+
resultBuffer ++= line
219209
}
220210

221211
}
@@ -229,9 +219,7 @@ class FPGrowth extends Logging with Serializable {
229219
}
230220

231221
}
232-
233-
return _lineBuffer.toArray
234-
222+
resultBuffer.toArray
235223
}
236224

237225
}

0 commit comments

Comments
 (0)