Skip to content

Commit acffc43

Browse files
Lewuathemengxr
authored andcommitted
[SPARK-6262][MLLIB]Implement missing methods for MultivariateStatisticalSummary
Add below methods in pyspark for MultivariateStatisticalSummary - normL1 - normL2 Author: lewuathe <[email protected]> Closes apache#5359 from Lewuathe/SPARK-6262 and squashes the following commits: cbe439e [lewuathe] Implement missing methods for MultivariateStatisticalSummary
1 parent f15806a commit acffc43

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

python/pyspark/mllib/stat/_statistics.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ def max(self):
4949
def min(self):
5050
return self.call("min").toArray()
5151

52+
def normL1(self):
53+
return self.call("normL1").toArray()
54+
55+
def normL2(self):
56+
return self.call("normL2").toArray()
57+
5258

5359
class Statistics(object):
5460

python/pyspark/mllib/tests.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,12 @@ def test_col_with_different_rdds(self):
357357
summary = Statistics.colStats(data)
358358
self.assertEqual(10, summary.count())
359359

360+
def test_col_norms(self):
361+
data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
362+
summary = Statistics.colStats(data)
363+
self.assertEqual(10, len(summary.normL1()))
364+
self.assertEqual(10, len(summary.normL2()))
365+
360366

361367
class VectorUDTTests(PySparkTestCase):
362368

0 commit comments

Comments
 (0)