Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ private[ml] trait OneVsRestParams extends PredictorParams {
final class OneVsRestModel private[ml] (
override val uid: String,
labelMetadata: Metadata,
val models: Array[_ <: ClassificationModel[_,_]])
val models: Array[_ <: ClassificationModel[_, _]])
extends Model[OneVsRestModel] with OneVsRestParams {

override def transformSchema(schema: StructType): StructType = {
Expand Down Expand Up @@ -104,17 +104,17 @@ final class OneVsRestModel private[ml] (

// add temporary column to store intermediate scores and update
val tmpColName = "mbc$tmp" + UUID.randomUUID().toString
val update: (Map[Int, Double], Vector) => Map[Int, Double] =
val update: (Map[Int, Double], Vector) => Map[Int, Double] =
(predictions: Map[Int, Double], prediction: Vector) => {
predictions + ((index, prediction(1)))
}
val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
val transformedDataset = model.transform(df).select(columns:_*)
val transformedDataset = model.transform(df).select(columns : _*)
val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
val newColumns = origCols ++ List(col(tmpColName))

// switch out the intermediate column with the accumulator column
updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
updatedDataset.select(newColumns : _*).withColumnRenamed(tmpColName, accColName)
}

if (handlePersistence) {
Expand Down Expand Up @@ -190,7 +190,7 @@ final class OneVsRest(override val uid: String)
val trainingDataset = multiclassLabeled.withColumn(labelColName, labelUDFWithNewMeta)
val classifier = getClassifier
classifier.fit(trainingDataset, classifier.labelCol -> labelColName)
}.toArray[ClassificationModel[_,_]]
}.toArray[ClassificationModel[_, _]]

if (handlePersistence) {
multiclassLabeled.unpersist()
Expand Down
4 changes: 2 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ final class InternalNode private[ml] (

override private[tree] def subtreeToString(indentFactor: Int = 0): String = {
val prefix: String = " " * indentFactor
prefix + s"If (${InternalNode.splitToString(split, left=true)})\n" +
prefix + s"If (${InternalNode.splitToString(split, left = true)})\n" +
leftChild.subtreeToString(indentFactor + 1) +
prefix + s"Else (${InternalNode.splitToString(split, left=false)})\n" +
prefix + s"Else (${InternalNode.splitToString(split, left = false)})\n" +
rightChild.subtreeToString(indentFactor + 1)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ private[python] class PythonMLLibAPI extends Serializable {
data: JavaRDD[Vector],
wt: Vector,
mu: Array[Object],
si: Array[Object]): RDD[Vector] = {
si: Array[Object]): RDD[Vector] = {

val weight = wt.toArray
val mean = mu.map(_.asInstanceOf[DenseVector])
Expand Down Expand Up @@ -428,7 +428,7 @@ private[python] class PythonMLLibAPI extends Serializable {

if (seed != null) als.setSeed(seed)

val model = als.run(ratingsJRDD.rdd)
val model = als.run(ratingsJRDD.rdd)
new MatrixFactorizationModelWrapper(model)
}

Expand Down Expand Up @@ -459,7 +459,7 @@ private[python] class PythonMLLibAPI extends Serializable {

if (seed != null) als.setSeed(seed)

val model = als.run(ratingsJRDD.rdd)
val model = als.run(ratingsJRDD.rdd)
new MatrixFactorizationModelWrapper(model)
}

Expand Down Expand Up @@ -1242,7 +1242,7 @@ private[spark] object SerDe extends Serializable {
}

/* convert RDD[Tuple2[,]] to RDD[Array[Any]] */
def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = {
def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = {
rdd.map(x => Array(x._1, x._2))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ class GaussianMixture private (
private object ExpectationSum {
def zero(k: Int, d: Int): ExpectationSum = {
new ExpectationSum(0.0, Array.fill(k)(0.0),
Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d,d)))
Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d, d)))
}

// compute cluster contributions for each input point
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
* Default: 1024, following the original Online LDA paper.
*/
def setTau0(tau0: Double): this.type = {
require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
this.tau0 = tau0
this
}
Expand Down Expand Up @@ -339,7 +339,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {

override private[clustering] def initialize(
docs: RDD[(Long, Vector)],
lda: LDA): OnlineLDAOptimizer = {
lda: LDA): OnlineLDAOptimizer = {
this.k = lda.getK
this.corpusSize = docs.count()
this.vocabSize = docs.first()._2.size
Expand Down Expand Up @@ -458,7 +458,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
* uses digamma which is accurate but expensive.
*/
private def dirichletExpectation(alpha: BDM[Double]): BDM[Double] = {
val rowSum = sum(alpha(breeze.linalg.*, ::))
val rowSum = sum(alpha(breeze.linalg.*, ::))
val digAlpha = digamma(alpha)
val digRowSum = digamma(rowSum)
val result = digAlpha(::, breeze.linalg.*) - digRowSum
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ private object IDF {
* Since arrays are initialized to 0 by default,
* we just omit changing those entries.
*/
if(df(j) >= minDocFreq) {
if (df(j) >= minDocFreq) {
inv(j) = math.log((m + 1.0) / (df(j) + 1.0))
}
j += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class StandardScalerModel (

@DeveloperApi
def setWithMean(withMean: Boolean): this.type = {
require(!(withMean && this.mean == null),"cannot set withMean to true while mean is null")
require(!(withMean && this.mean == null), "cannot set withMean to true while mean is null")
this.withMean = withMean
this
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ private case class VocabWord(
var cn: Int,
var point: Array[Int],
var code: Array[Int],
var codeLen:Int
var codeLen: Int
)

/**
Expand Down Expand Up @@ -469,7 +469,7 @@ class Word2VecModel private[mllib] (
val norm1 = blas.snrm2(n, v1, 1)
val norm2 = blas.snrm2(n, v2, 1)
if (norm1 == 0 || norm2 == 0) return 0.0
blas.sdot(n, v1, 1, v2,1) / norm1 / norm2
blas.sdot(n, v1, 1, v2, 1) / norm1 / norm2
}

override protected def formatVersion = "1.0"
Expand Down Expand Up @@ -500,7 +500,7 @@ class Word2VecModel private[mllib] (
*/
def findSynonyms(word: String, num: Int): Array[(String, Double)] = {
val vector = transform(word)
findSynonyms(vector,num)
findSynonyms(vector, num)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ class RowMatrix(

val computeMode = mode match {
case "auto" =>
if(k > 5000) {
if (k > 5000) {
logWarning(s"computing svd with k=$k and n=$n, please check necessity")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ object RandomRDDs {
numPartitions: Int = 0,
seed: Long = Utils.random.nextLong()): RDD[Double] = {
val uniform = new UniformGenerator()
randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed)
randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = loadMetadata(sc, path)
val isotonic = (metadata \ "isotonic").extract[Boolean]
val isotonic = (metadata \ "isotonic").extract[Boolean]
val classNameV1_0 = SaveLoadV1_0.thisClassName
(loadedClassName, version) match {
case (className, "1.0") if className == classNameV1_0 =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
require(n == sample.size, s"Dimensions mismatch when adding new sample." +
s" Expecting $n but got ${sample.size}.")

val localCurrMean= currMean
val localCurrMean = currMean
val localCurrM2n = currM2n
val localCurrM2 = currM2
val localCurrL1 = currL1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ private[stat] object ChiSqTest extends Logging {
* Pearson's independence test on the input contingency matrix.
* TODO: optimize for SparseMatrix when it becomes supported.
*/
def chiSquaredMatrix(counts: Matrix, methodName:String = PEARSON.name): ChiSqTestResult = {
def chiSquaredMatrix(counts: Matrix, methodName: String = PEARSON.name): ChiSqTestResult = {
val method = methodFromString(methodName)
val numRows = counts.numRows
val numCols = counts.numCols
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ object DecisionTree extends Serializable with Logging {
numClasses: Int,
maxBins: Int,
quantileCalculationStrategy: QuantileStrategy,
categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = {
categoricalFeaturesInfo: Map[Int, Int]): DecisionTreeModel = {
val strategy = new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
quantileCalculationStrategy, categoricalFeaturesInfo)
new DecisionTree(strategy).run(input)
Expand Down Expand Up @@ -768,7 +768,7 @@ object DecisionTree extends Serializable with Logging {
*/
private def calculatePredictImpurity(
leftImpurityCalculator: ImpurityCalculator,
rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = {
rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = {
val parentNodeAgg = leftImpurityCalculator.copy
parentNodeAgg.add(rightImpurityCalculator)
val predict = calculatePredict(parentNodeAgg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
val algo = boostingStrategy.treeStrategy.algo
algo match {
case Regression => GradientBoostedTrees.boost(input, input, boostingStrategy, validate=false)
case Regression =>
GradientBoostedTrees.boost(input, input, boostingStrategy, validate = false)
case Classification =>
// Map labels to -1, +1 so binary classification can be treated as regression.
val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
GradientBoostedTrees.boost(remappedInput,
remappedInput, boostingStrategy, validate=false)
GradientBoostedTrees.boost(remappedInput, remappedInput, boostingStrategy, validate = false)
case _ =>
throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
}
Expand Down Expand Up @@ -93,16 +93,16 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = {
val algo = boostingStrategy.treeStrategy.algo
algo match {
case Regression => GradientBoostedTrees.boost(
input, validationInput, boostingStrategy, validate=true)
case Regression =>
GradientBoostedTrees.boost(input, validationInput, boostingStrategy, validate = true)
case Classification =>
// Map labels to -1, +1 so binary classification can be treated as regression.
val remappedInput = input.map(
x => new LabeledPoint((x.label * 2) - 1, x.features))
val remappedValidationInput = validationInput.map(
x => new LabeledPoint((x.label * 2) - 1, x.features))
GradientBoostedTrees.boost(remappedInput, remappedValidationInput, boostingStrategy,
validate=true)
validate = true)
case _ =>
throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ private class RandomForest (
try {
nodeIdCache.get.deleteAllCheckpoints()
} catch {
case e:IOException =>
case e: IOException =>
logWarning(s"delete all checkpoints failed. Error reason: ${e.getMessage}")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,19 +151,19 @@ class Node (
s"(feature ${split.feature} > ${split.threshold})"
}
case Categorical => if (left) {
s"(feature ${split.feature} in ${split.categories.mkString("{",",","}")})"
s"(feature ${split.feature} in ${split.categories.mkString("{", ",", "}")})"
} else {
s"(feature ${split.feature} not in ${split.categories.mkString("{",",","}")})"
s"(feature ${split.feature} not in ${split.categories.mkString("{", ",", "}")})"
}
}
}
val prefix: String = " " * indentFactor
if (isLeaf) {
prefix + s"Predict: ${predict.predict}\n"
} else {
prefix + s"If ${splitToString(split.get, left=true)}\n" +
prefix + s"If ${splitToString(split.get, left = true)}\n" +
leftNode.get.subtreeToString(indentFactor + 1) +
prefix + s"Else ${splitToString(split.get, left=false)}\n" +
prefix + s"Else ${splitToString(split.get, left = false)}\n" +
rightNode.get.subtreeToString(indentFactor + 1)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ object MFDataGenerator {
BLAS.gemm(z, A, B, 1.0, fullData)

val df = rank * (m + n - rank)
val sampSize = scala.math.min(scala.math.round(trainSampFact * df),
scala.math.round(.99 * m * n)).toInt
val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt
val rand = new Random()
val mn = m * n
val shuffled = rand.shuffle((0 until mn).toList)
Expand All @@ -102,8 +101,8 @@ object MFDataGenerator {

// optionally generate testing data
if (test) {
val testSampSize = scala.math
.min(scala.math.round(sampSize * testSampFact),scala.math.round(mn - sampSize)).toInt
val testSampSize = math.min(
math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt
val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
val testOrdered = testOmega.sortWith(_ < _).toArray
val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
val doc = sc.parallelize(Seq(sentence, sentence)).map(line => line.split(" "))

val codes = Map(
"a" -> Array(-0.2811822295188904,-0.6356269121170044,-0.3020961284637451),
"b" -> Array(1.0309048891067505,-1.29472815990448,0.22276712954044342),
"c" -> Array(-0.08456747233867645,0.5137411952018738,0.11731560528278351)
"a" -> Array(-0.2811822295188904, -0.6356269121170044, -0.3020961284637451),
"b" -> Array(1.0309048891067505, -1.29472815990448, 0.22276712954044342),
"c" -> Array(-0.08456747233867645, 0.5137411952018738, 0.11731560528278351)
)

val expected = doc.map { sentence =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,20 @@ object CrossValidatorSuite {

override def validateParams(): Unit = require($(inputCol).nonEmpty)

override def fit(dataset: DataFrame): MyModel = ???
override def fit(dataset: DataFrame): MyModel = {
throw new UnsupportedOperationException
}

override def transformSchema(schema: StructType): StructType = ???
override def transformSchema(schema: StructType): StructType = {
throw new UnsupportedOperationException
}
}

class MyEvaluator extends Evaluator {

override def evaluate(dataset: DataFrame): Double = ???
override def evaluate(dataset: DataFrame): Double = {
throw new UnsupportedOperationException
}

override val uid: String = "eval"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class PythonMLLibAPISuite extends FunSuite {

val smt = new SparseMatrix(
3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
isTransposed=true)
isTransposed = true)
val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix]
assert(smt.toArray === nsmt.toArray)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
val theta = Array(
Array(0.50, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.40), // label 0
Array(0.02, 0.70, 0.10, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02), // label 1
Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2
Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2
).map(_.map(math.log))

val testData = NaiveBayesSuite.generateNaiveBayesInput(
Expand Down
Loading