Skip to content

Commit 7727cae

Browse files
committed
Merge branch 'master' into SPARK-6528
2 parents 4338a37 + bdc5c16 commit 7727cae

File tree

4 files changed

+38
-20
lines changed

4 files changed

+38
-20
lines changed

CONTRIBUTING.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
## Contributing to Spark
22

3-
Contributions via GitHub pull requests are gladly accepted from their original
4-
author. Along with any pull requests, please state that the contribution is
5-
your original work and that you license the work to the project under the
6-
project's open source license. Whether or not you state this explicitly, by
7-
submitting any copyrighted material via pull request, email, or other means
8-
you agree to license the material under the project's open source license and
9-
warrant that you have the legal authority to do so.
3+
*Before opening a pull request*, review the
4+
[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark).
5+
It lists steps that are required before creating a PR. In particular, consider:
6+
7+
- Is the change important and ready enough to ask the community to spend time reviewing?
8+
- Have you searched for existing, related JIRAs and pull requests?
9+
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
10+
- Is the change being proposed clearly explained and motivated?
1011

11-
Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
12-
for more information.
12+
When you contribute code, you affirm that the contribution is your original work and that you
13+
license the work to the project under the project's open source license. Whether or not you
14+
state this explicitly, by submitting any copyrighted material via pull request, email, or
15+
other means you agree to license the material under the project's open source license and
16+
warrant that you have the legal authority to do so.

examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ import org.apache.spark.sql.{SQLContext, DataFrame}
4444
* {{{
4545
* ./bin/run-example ml.DecisionTreeExample [options]
4646
* }}}
47+
* Note that Decision Trees can take a large amount of memory. If the run-example command above
48+
* fails, try running via spark-submit and specifying the amount of memory as at least 1g.
49+
* For local mode, run
50+
* {{{
51+
* ./bin/spark-submit --class org.apache.spark.examples.ml.DecisionTreeExample --driver-memory 1g
52+
* [examples JAR path] [options]
53+
* }}}
4754
* If you use it as a template to create your own app, please use `spark-submit` to submit your app.
4855
*/
4956
object DecisionTreeExample {
@@ -70,7 +77,7 @@ object DecisionTreeExample {
7077
val parser = new OptionParser[Params]("DecisionTreeExample") {
7178
head("DecisionTreeExample: an example decision tree app.")
7279
opt[String]("algo")
73-
.text(s"algorithm (Classification, Regression), default: ${defaultParams.algo}")
80+
.text(s"algorithm (classification, regression), default: ${defaultParams.algo}")
7481
.action((x, c) => c.copy(algo = x))
7582
opt[Int]("maxDepth")
7683
.text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
@@ -222,18 +229,23 @@ object DecisionTreeExample {
222229
// (1) For classification, re-index classes.
223230
val labelColName = if (algo == "classification") "indexedLabel" else "label"
224231
if (algo == "classification") {
225-
val labelIndexer = new StringIndexer().setInputCol("labelString").setOutputCol(labelColName)
232+
val labelIndexer = new StringIndexer()
233+
.setInputCol("labelString")
234+
.setOutputCol(labelColName)
226235
stages += labelIndexer
227236
}
228237
// (2) Identify categorical features using VectorIndexer.
229238
// Features with more than maxCategories values will be treated as continuous.
230-
val featuresIndexer = new VectorIndexer().setInputCol("features")
231-
.setOutputCol("indexedFeatures").setMaxCategories(10)
239+
val featuresIndexer = new VectorIndexer()
240+
.setInputCol("features")
241+
.setOutputCol("indexedFeatures")
242+
.setMaxCategories(10)
232243
stages += featuresIndexer
233244
// (3) Learn DecisionTree
234245
val dt = algo match {
235246
case "classification" =>
236-
new DecisionTreeClassifier().setFeaturesCol("indexedFeatures")
247+
new DecisionTreeClassifier()
248+
.setFeaturesCol("indexedFeatures")
237249
.setLabelCol(labelColName)
238250
.setMaxDepth(params.maxDepth)
239251
.setMaxBins(params.maxBins)
@@ -242,7 +254,8 @@ object DecisionTreeExample {
242254
.setCacheNodeIds(params.cacheNodeIds)
243255
.setCheckpointInterval(params.checkpointInterval)
244256
case "regression" =>
245-
new DecisionTreeRegressor().setFeaturesCol("indexedFeatures")
257+
new DecisionTreeRegressor()
258+
.setFeaturesCol("indexedFeatures")
246259
.setLabelCol(labelColName)
247260
.setMaxDepth(params.maxDepth)
248261
.setMaxBins(params.maxBins)

mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ private[ml] trait DecisionTreeParams extends PredictorParams {
117117
def setMaxDepth(value: Int): this.type = {
118118
require(value >= 0, s"maxDepth parameter must be >= 0. Given bad value: $value")
119119
set(maxDepth, value)
120-
this.asInstanceOf[this.type]
120+
this
121121
}
122122

123123
/** @group getParam */
@@ -283,7 +283,7 @@ private[ml] trait TreeRegressorParams extends Params {
283283
def getImpurity: String = getOrDefault(impurity)
284284

285285
/** Convert new impurity to old impurity. */
286-
protected def getOldImpurity: OldImpurity = {
286+
private[ml] def getOldImpurity: OldImpurity = {
287287
getImpurity match {
288288
case "variance" => OldVariance
289289
case _ =>

mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ sealed trait Split extends Serializable {
3838
private[tree] def toOld: OldSplit
3939
}
4040

41-
private[ml] object Split {
41+
private[tree] object Split {
4242

4343
def fromOld(oldSplit: OldSplit, categoricalFeatures: Map[Int, Int]): Split = {
4444
oldSplit.featureType match {
@@ -58,7 +58,7 @@ private[ml] object Split {
5858
* left. Otherwise, it goes right.
5959
* @param numCategories Number of categories for this feature.
6060
*/
61-
final class CategoricalSplit(
61+
final class CategoricalSplit private[ml] (
6262
override val featureIndex: Int,
6363
leftCategories: Array[Double],
6464
private val numCategories: Int)
@@ -130,7 +130,8 @@ final class CategoricalSplit(
130130
* @param threshold If the feature value is <= this threshold, then the split goes left.
131131
* Otherwise, it goes right.
132132
*/
133-
final class ContinuousSplit(override val featureIndex: Int, val threshold: Double) extends Split {
133+
final class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double)
134+
extends Split {
134135

135136
override private[ml] def shouldGoLeft(features: Vector): Boolean = {
136137
features(featureIndex) <= threshold

0 commit comments

Comments
 (0)