From 23d12d1c8b52220242369d01bc60e3157efd347f Mon Sep 17 00:00:00 2001 From: Robert Dodier Date: Fri, 13 Mar 2015 10:58:04 -0700 Subject: [PATCH 1/7] Initial, incomplete work towards calibration for binary classifiers. o ProbabilisticClassifier.scala: mention calibration in comments o BinaryClassificationMetrics.scala: adapting code for ROC to calibration; incomplete and commented out for now o BinaryClassificationMetricsSuite.scala: tests for calibration --- .../ProbabilisticClassifier.scala | 2 + .../BinaryClassificationMetrics.scala | 102 ++++++++++++++++-- .../BinaryClassificationMetricsSuite.scala | 31 +++++- 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala index fdd1851ae550..071d05e1b1de 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala @@ -177,6 +177,8 @@ abstract class ProbabilisticClassificationModel[ * Predict the probability of each class given the features. * These predictions are also called class conditional probabilities. * + * See BinaryClassificationMetrics.calibration to assess calibration. + * * This internal method is used to implement [[transform()]] and output [[probabilityCol]]. * * @return Estimated class conditional probabilities diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala index 508fe532b130..dcdf0477cad6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala @@ -29,14 +29,14 @@ import org.apache.spark.sql.DataFrame * Evaluator for binary classification. * * @param scoreAndLabels an RDD of (score, label) pairs. - * @param numBins if greater than 0, then the curves (ROC curve, PR curve) computed internally - * will be down-sampled to this many "bins". If 0, no down-sampling will occur. - * This is useful because the curve contains a point for each distinct score - * in the input, and this could be as large as the input itself -- millions of - * points or more, when thousands may be entirely sufficient to summarize - * the curve. After down-sampling, the curves will instead be made of approximately - * `numBins` points instead. Points are made from bins of equal numbers of - * consecutive points. The size of each bin is + * @param numBins if greater than 0, then the curves (ROC curve, PR curve, calibration curve) + * computed internally will be down-sampled to this many "bins". If 0, no + * down-sampling will occur. This is useful because the curve contains a point for + * each distinct score in the input, and this could be as large as the input itself + * -- millions of points or more, when thousands may be entirely sufficient to + * summarize the curve. After down-sampling, the curves will instead be made of + * approximately `numBins` points instead. Points are made from bins of equal + * numbers of consecutive points. The size of each bin is * `floor(scoreAndLabels.count() / numBins)`, which means the resulting number * of bins may not exactly equal numBins. The last bin in each partition may * be smaller as a result, meaning there may be an extra sample at @@ -226,4 +226,90 @@ class BinaryClassificationMetrics @Since("1.3.0") ( (x(c), y(c)) } } + + /** + * Returns the calibration or reliability curve, + * which is an RDD of (average score in bin, fraction of positive examples in bin). + * @see http://en.wikipedia.org/wiki/Calibration_%28statistics%29#In_classification + * + * References: + * + * Mahdi Pakdaman Naeini, Gregory F. Cooper, Milos Hauskrecht. + * Binary Classifier Calibration: Non-parametric approach. + * http://arxiv.org/abs/1401.3390 + * + * Alexandru Niculescu-Mizil, Rich Caruana. + * Predicting Good Probabilities With Supervised Learning. + * Appearing in Proceedings of the 22nd International Conference on Machine Learning, + * Bonn, Germany, 2005. + * http://www.cs.cornell.edu/~alexn/papers/calibration.icml05.crc.rev3.pdf + * + * Properties and benefits of calibrated classifiers. + * Ira Cohen, Moises Goldszmidt. + * http://www.hpl.hp.com/techreports/2004/HPL-2004-22R1.pdf + */ + def calibration(): RDD[((Double, Double), (Double, Int))] = { + val calibrationCurve = assessedCalibration + val sc = confusions.context + sc.makeRDD(calibrationCurve, 1) + } + + private lazy val assessedCalibration: Seq[((Double, Double), (Double, Int))] = { +// val distinctScoreAndLabels = scoreAndLabels.combineByKey( +// createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label, +// mergeValue = (c: BinaryLabelCounter, label: Double) => c += label, +// mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2 +// ).sortByKey(ascending = true) +// +// val binnedCounts = +// if (numBins == 0) { +// counts +// } else { +// val countsSize = counts.count() +// +// var grouping = countsSize / numBins +// if (grouping < 2) { +// logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful") +// counts +// } else { +// if (grouping >= Int.MaxValue) { +// logWarning( +// s"Curve too large ($countsSize) for $numBins bins; capping at ${Int.MaxValue}") +// grouping = Int.MaxValue +// } +// counts.mapPartitions(_.grouped(grouping.toInt).map { pairs => +// // The score of the combined point will be just the first one's score +// // I THINK WE WANT THE AVERAGE OF SCORE OVER THE BIN HERE +// val firstScore = pairs.head._1 +// // The point will contain all counts in this chunk +// val agg = new BinaryLabelCounter() +// pairs.foreach(pair => agg += pair._2) +// (firstScore, agg) +// }) +// } +// } +// +// val agg = binnedCounts.values.mapPartitions { iter => +// val agg = new BinaryLabelCounter() +// iter.foreach(agg += _) +// Iterator(agg) +// }.collect() +// val partitionwiseCumulativeCounts = +// agg.scanLeft(new BinaryLabelCounter())( +// (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c) +// val totalCount = partitionwiseCumulativeCounts.last +// logInfo(s"Total counts: $totalCount") +// // WE WANT PER-BIN COUNTS HERE, NOT CUMULATIVE +// val correctCounts = binnedCounts.mapPartitionsWithIndex( +// (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => { +// val cumCount = partitionwiseCumulativeCounts(index) +// iter.map { case (score, c) => +// cumCount += c +// (score, cumCount.clone()) +// } +// }, preservesPartitioning = true) +// correctCounts.persist() +// correctCounts + Seq(((0.0, 0.0), (0.0, 0)), ((1.0, 1.0), (1.0, 0))) + } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala index 99d52fabc530..bf40b143cc20 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala @@ -28,6 +28,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean = (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5) + private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Int)), ((Double, Double), (Double, Int)))): Boolean = + (x._1._1._1 ~= x._2._1._1 absTol 1E-5) && (x._1._1._2 ~= x._2._1._2 absTol 1E-5) && + (x._1._2._1 ~= x._2._2._1 absTol 1E-5) && x._1._2._2 == x._2._2._2 + private def assertSequencesMatch(left: Seq[Double], right: Seq[Double]): Unit = { assert(left.zip(right).forall(areWithinEpsilon)) } @@ -37,6 +41,11 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark assert(left.zip(right).forall(pairsWithinEpsilon)) } + private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Int))], + right: Seq[((Double, Double), (Double, Int))]): Unit = { + assert(left.zip(right).forall(pairPairsWithinEpsilon)) + } + private def validateMetrics(metrics: BinaryClassificationMetrics, expectedThresholds: Seq[Double], expectedROCCurve: Seq[(Double, Double)], @@ -44,7 +53,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark expectedFMeasures1: Seq[Double], expectedFmeasures2: Seq[Double], expectedPrecisions: Seq[Double], - expectedRecalls: Seq[Double]) = { + expectedRecalls: Seq[Double], + expectedCalibration: Seq[((Double, Double), (Double, Int))]) = { assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds) assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve) @@ -59,6 +69,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark expectedThresholds.zip(expectedPrecisions)) assertTupleSequencesMatch(metrics.recallByThreshold().collect(), expectedThresholds.zip(expectedRecalls)) + assertTupleTupleSequencesMatch(metrics.calibration().collect(), expectedCalibration) } test("binary evaluation metrics") { @@ -80,8 +91,11 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark val prCurve = Seq((0.0, 1.0)) ++ pr val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)} val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)} + val calibration = Seq(((0.1, 0.1), (0.5, 2)), ((0.4, 0.4), (0.0, 1)), ((0.6, 0.6), (2/3.0, 3)), + ((0.8, 0.8), (1.0, 1))) - validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls) + validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, + calibration) } test("binary evaluation metrics for RDD where all examples have positive label") { @@ -97,8 +111,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark val prCurve = Seq((0.0, 1.0)) ++ pr val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)} val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)} + val calibration = Seq(((0.5, 0.5), (1.0, 2))) - validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls) + validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, + calibration) } test("binary evaluation metrics for RDD where all examples have negative label") { @@ -121,7 +137,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark case (r, p) => 5.0 * (p * r) / (4.0 * p + r) } - validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls) + val calibration = Seq(((0.5, 0.5), (0.0, 2))) + + validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, + calibration) } test("binary evaluation metrics with downsampling") { @@ -157,6 +176,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark (1.0, 1.0), (1.0, 1.0) ) == downsampledROC) + + val calibration = Seq(((0.1, 0.2), (0.0, 2)), ((0.3, 0.4), (0.5, 2)), ((0.5, 0.6), (0.5, 2)), + ((0.7, 0.9), (2/3.0, 3))) + assert(calibration == downsampled.calibration().collect()) } } From 1df8619c2112f3c4cc1554c86b901599abebf11f Mon Sep 17 00:00:00 2001 From: Robert Dodier Date: Fri, 13 Mar 2015 14:08:14 -0700 Subject: [PATCH 2/7] Initial attempt to implement calibration; compiles, not tested yet. --- .../BinaryClassificationMetrics.scala | 101 +++++++----------- 1 file changed, 40 insertions(+), 61 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala index dcdf0477cad6..eaf03a3bf8b5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala @@ -248,68 +248,47 @@ class BinaryClassificationMetrics @Since("1.3.0") ( * Ira Cohen, Moises Goldszmidt. * http://www.hpl.hp.com/techreports/2004/HPL-2004-22R1.pdf */ - def calibration(): RDD[((Double, Double), (Double, Int))] = { - val calibrationCurve = assessedCalibration - val sc = confusions.context - sc.makeRDD(calibrationCurve, 1) + def calibration(): RDD[((Double, Double), (Double, Long))] = { + assessedCalibration } - private lazy val assessedCalibration: Seq[((Double, Double), (Double, Int))] = { -// val distinctScoreAndLabels = scoreAndLabels.combineByKey( -// createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label, -// mergeValue = (c: BinaryLabelCounter, label: Double) => c += label, -// mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2 -// ).sortByKey(ascending = true) -// -// val binnedCounts = -// if (numBins == 0) { -// counts -// } else { -// val countsSize = counts.count() -// -// var grouping = countsSize / numBins -// if (grouping < 2) { -// logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful") -// counts -// } else { -// if (grouping >= Int.MaxValue) { -// logWarning( -// s"Curve too large ($countsSize) for $numBins bins; capping at ${Int.MaxValue}") -// grouping = Int.MaxValue -// } -// counts.mapPartitions(_.grouped(grouping.toInt).map { pairs => -// // The score of the combined point will be just the first one's score -// // I THINK WE WANT THE AVERAGE OF SCORE OVER THE BIN HERE -// val firstScore = pairs.head._1 -// // The point will contain all counts in this chunk -// val agg = new BinaryLabelCounter() -// pairs.foreach(pair => agg += pair._2) -// (firstScore, agg) -// }) -// } -// } -// -// val agg = binnedCounts.values.mapPartitions { iter => -// val agg = new BinaryLabelCounter() -// iter.foreach(agg += _) -// Iterator(agg) -// }.collect() -// val partitionwiseCumulativeCounts = -// agg.scanLeft(new BinaryLabelCounter())( -// (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c) -// val totalCount = partitionwiseCumulativeCounts.last -// logInfo(s"Total counts: $totalCount") -// // WE WANT PER-BIN COUNTS HERE, NOT CUMULATIVE -// val correctCounts = binnedCounts.mapPartitionsWithIndex( -// (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => { -// val cumCount = partitionwiseCumulativeCounts(index) -// iter.map { case (score, c) => -// cumCount += c -// (score, cumCount.clone()) -// } -// }, preservesPartitioning = true) -// correctCounts.persist() -// correctCounts - Seq(((0.0, 0.0), (0.0, 0)), ((1.0, 1.0), (1.0, 0))) + private lazy val assessedCalibration: RDD[((Double, Double), (Double, Long))] = { + val distinctScoresAndLabelCounts = scoreAndLabels.combineByKey( + createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label, + mergeValue = (c: BinaryLabelCounter, label: Double) => c += label, + mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2 + ).sortByKey(ascending = true) + + val binnedDistinctScoresAndLabelCounts = + if (numBins == 0) { + distinctScoresAndLabelCounts.map { pair => ((pair._1, pair._1), pair._2) } + } else { + val distinctScoresCount = distinctScoresAndLabelCounts.count() + + var groupCount = distinctScoresCount / numBins + if (groupCount < 2) { + logInfo(s"Too few distinct scores ($distinctScoresCount) for $numBins bins to be useful") + distinctScoresAndLabelCounts.map { pair => ((pair._1, pair._1), pair._2) } + } else { + if (groupCount >= Int.MaxValue) { + val n = distinctScoresCount + logWarning( + s"Too many distinct scores ($n) for $numBins bins; capping at ${Int.MaxValue}") + groupCount = Int.MaxValue + } + distinctScoresAndLabelCounts.mapPartitions(_.grouped(groupCount.toInt).map { pairs => + val firstScore = pairs.head._1 + val lastScore = pairs.last._1 + val agg = new BinaryLabelCounter() + pairs.foreach(pair => agg += pair._2) + ((firstScore, lastScore), agg) + }) + } + } + + binnedDistinctScoresAndLabelCounts.map { pair => + val n = pair._2.numPositives + pair._2.numNegatives + (pair._1, (pair._2.numPositives / n.toDouble, n)) + } } } From 0769ee6f69017a390c861cce5f3655bb79f03d40 Mon Sep 17 00:00:00 2001 From: Robert Dodier Date: Fri, 13 Mar 2015 14:26:32 -0700 Subject: [PATCH 3/7] Change (..., (Double, Int)) to (..., (Double, Long)) to match types to what calibration actually returns. --- .../BinaryClassificationMetricsSuite.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala index bf40b143cc20..9d330973460b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala @@ -28,7 +28,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean = (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5) - private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Int)), ((Double, Double), (Double, Int)))): Boolean = + private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Long)), ((Double, Double), (Double, Long)))): Boolean = (x._1._1._1 ~= x._2._1._1 absTol 1E-5) && (x._1._1._2 ~= x._2._1._2 absTol 1E-5) && (x._1._2._1 ~= x._2._2._1 absTol 1E-5) && x._1._2._2 == x._2._2._2 @@ -41,8 +41,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark assert(left.zip(right).forall(pairsWithinEpsilon)) } - private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Int))], - right: Seq[((Double, Double), (Double, Int))]): Unit = { + private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Long))], + right: Seq[((Double, Double), (Double, Long))]): Unit = { assert(left.zip(right).forall(pairPairsWithinEpsilon)) } @@ -54,7 +54,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark expectedFmeasures2: Seq[Double], expectedPrecisions: Seq[Double], expectedRecalls: Seq[Double], - expectedCalibration: Seq[((Double, Double), (Double, Int))]) = { + expectedCalibration: Seq[((Double, Double), (Double, Long))]) = { assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds) assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve) @@ -91,8 +91,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark val prCurve = Seq((0.0, 1.0)) ++ pr val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)} val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)} - val calibration = Seq(((0.1, 0.1), (0.5, 2)), ((0.4, 0.4), (0.0, 1)), ((0.6, 0.6), (2/3.0, 3)), - ((0.8, 0.8), (1.0, 1))) + val calibration = Seq(((0.1, 0.1), (0.5, 2L)), ((0.4, 0.4), (0.0, 1L)), ((0.6, 0.6), (2/3.0, 3L)), + ((0.8, 0.8), (1.0, 1L))) validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, calibration) @@ -111,7 +111,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark val prCurve = Seq((0.0, 1.0)) ++ pr val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)} val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)} - val calibration = Seq(((0.5, 0.5), (1.0, 2))) + val calibration = Seq(((0.5, 0.5), (1.0, 2L))) validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, calibration) @@ -137,7 +137,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark case (r, p) => 5.0 * (p * r) / (4.0 * p + r) } - val calibration = Seq(((0.5, 0.5), (0.0, 2))) + val calibration = Seq(((0.5, 0.5), (0.0, 2L))) validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls, calibration) @@ -177,8 +177,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark ) == downsampledROC) - val calibration = Seq(((0.1, 0.2), (0.0, 2)), ((0.3, 0.4), (0.5, 2)), ((0.5, 0.6), (0.5, 2)), - ((0.7, 0.9), (2/3.0, 3))) + val calibration = Seq(((0.1, 0.2), (0.0, 2L)), ((0.3, 0.4), (0.5, 2L)), ((0.5, 0.6), (0.5, 2L)), + ((0.7, 0.9), (2/3.0, 3L))) assert(calibration == downsampled.calibration().collect()) } From bf682c0fd0ae22d43b6bf752416b9a599e21e70b Mon Sep 17 00:00:00 2001 From: Robert Dodier Date: Fri, 13 Mar 2015 17:48:56 -0700 Subject: [PATCH 4/7] Adjust JVM command line to get tests to run. --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index d04ed1e79865..f016ab674909 100644 --- a/pom.xml +++ b/pom.xml @@ -1889,7 +1889,7 @@ **/*Suite.java ${project.build.directory}/surefire-reports - -Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m + -Xmx1024M -XX:MaxPermSize=1024M -Xss4M