From 2ebdd3cf09de2833f552c708108c40772a2f65d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Fri, 13 Nov 2015 19:44:51 -0500 Subject: [PATCH 1/4] SPARK-11741 Process doctests using TextTestRunner/XMLTestRunner --- python/pyspark/accumulators.py | 164 +++++++++++---------- python/pyspark/broadcast.py | 21 ++- python/pyspark/conf.py | 99 +++++++------ python/pyspark/context.py | 21 ++- python/pyspark/ml/classification.py | 24 ++- python/pyspark/ml/clustering.py | 25 +++- python/pyspark/ml/evaluation.py | 23 ++- python/pyspark/ml/feature.py | 21 ++- python/pyspark/ml/recommendation.py | 25 +++- python/pyspark/ml/regression.py | 24 ++- python/pyspark/ml/tuning.py | 23 ++- python/pyspark/mllib/classification.py | 22 ++- python/pyspark/mllib/clustering.py | 21 ++- python/pyspark/mllib/evaluation.py | 23 ++- python/pyspark/mllib/feature.py | 21 ++- python/pyspark/mllib/fpm.py | 26 +++- python/pyspark/mllib/linalg/__init__.py | 21 ++- python/pyspark/mllib/linalg/distributed.py | 21 ++- python/pyspark/mllib/random.py | 22 ++- python/pyspark/mllib/recommendation.py | 22 ++- python/pyspark/mllib/regression.py | 22 ++- python/pyspark/mllib/stat/KernelDensity.py | 34 ++++- python/pyspark/mllib/stat/_statistics.py | 21 ++- python/pyspark/mllib/stat/distribution.py | 33 +++++ python/pyspark/mllib/tree.py | 22 ++- python/pyspark/mllib/util.py | 21 ++- python/pyspark/profiler.py | 22 ++- python/pyspark/rdd.py | 22 ++- python/pyspark/serializers.py | 89 ++++++----- python/pyspark/shuffle.py | 22 ++- python/pyspark/sql/column.py | 25 +++- python/pyspark/sql/context.py | 24 ++- python/pyspark/sql/dataframe.py | 25 +++- python/pyspark/sql/functions.py | 24 ++- python/pyspark/sql/group.py | 27 +++- python/pyspark/sql/readwriter.py | 25 +++- python/pyspark/sql/types.py | 21 ++- python/pyspark/sql/window.py | 24 ++- python/pyspark/statcounter.py | 25 ++++ python/pyspark/streaming/util.py | 22 ++- 40 files changed, 977 insertions(+), 242 deletions(-) diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py index 6ef8cf53cc74..884295a2d350 100644 --- a/python/pyspark/accumulators.py +++ b/python/pyspark/accumulators.py @@ -15,77 +15,6 @@ # limitations under the License. # -""" ->>> from pyspark.context import SparkContext ->>> sc = SparkContext('local', 'test') ->>> a = sc.accumulator(1) ->>> a.value -1 ->>> a.value = 2 ->>> a.value -2 ->>> a += 5 ->>> a.value -7 - ->>> sc.accumulator(1.0).value -1.0 - ->>> sc.accumulator(1j).value -1j - ->>> rdd = sc.parallelize([1,2,3]) ->>> def f(x): -... global a -... a += x ->>> rdd.foreach(f) ->>> a.value -13 - ->>> b = sc.accumulator(0) ->>> def g(x): -... b.add(x) ->>> rdd.foreach(g) ->>> b.value -6 - ->>> from pyspark.accumulators import AccumulatorParam ->>> class VectorAccumulatorParam(AccumulatorParam): -... def zero(self, value): -... return [0.0] * len(value) -... def addInPlace(self, val1, val2): -... for i in range(len(val1)): -... val1[i] += val2[i] -... return val1 ->>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam()) ->>> va.value -[1.0, 2.0, 3.0] ->>> def g(x): -... global va -... va += [x] * 3 ->>> rdd.foreach(g) ->>> va.value -[7.0, 8.0, 9.0] - ->>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL -Traceback (most recent call last): - ... -Py4JJavaError:... - ->>> def h(x): -... global a -... a.value = 7 ->>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL -Traceback (most recent call last): - ... -Py4JJavaError:... - ->>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL -Traceback (most recent call last): - ... -TypeError:... -""" - import sys import select import struct @@ -96,6 +25,19 @@ import threading from pyspark.cloudpickle import CloudPickler from pyspark.serializers import read_int, PickleSerializer +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest +import doctest +try: + import xmlrunner +except ImportError: + xmlrunner = None __all__ = ['Accumulator', 'AccumulatorParam'] @@ -117,6 +59,76 @@ def _deserialize_accumulator(aid, zero_value, accum_param): class Accumulator(object): + """ + >>> from pyspark.context import SparkContext + >>> sc = SparkContext('local', 'test') + >>> a = sc.accumulator(1) + >>> a.value + 1 + >>> a.value = 2 + >>> a.value + 2 + >>> a += 5 + >>> a.value + 7 + + >>> sc.accumulator(1.0).value + 1.0 + + >>> sc.accumulator(1j).value + 1j + + >>> rdd = sc.parallelize([1,2,3]) + >>> def f(x): + ... global a + ... a += x + >>> rdd.foreach(f) + >>> a.value + 13 + + >>> b = sc.accumulator(0) + >>> def g(x): + ... b.add(x) + >>> rdd.foreach(g) + >>> b.value + 6 + + >>> from pyspark.accumulators import AccumulatorParam + >>> class VectorAccumulatorParam(AccumulatorParam): + ... def zero(self, value): + ... return [0.0] * len(value) + ... def addInPlace(self, val1, val2): + ... for i in range(len(val1)): + ... val1[i] += val2[i] + ... return val1 + >>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam()) + >>> va.value + [1.0, 2.0, 3.0] + >>> def g(x): + ... global va + ... va += [x] * 3 + >>> rdd.foreach(g) + >>> va.value + [7.0, 8.0, 9.0] + + >>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + Py4JJavaError:... + + >>> def h(x): + ... global a + ... a.value = 7 + >>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + Py4JJavaError:... + + >>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError:... + """ """ A shared variable that can be accumulated, i.e., has a commutative and associative "add" @@ -263,7 +275,11 @@ def _start_update_server(): return server if __name__ == "__main__": - import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 663c9abe0881..4f2d337c00f5 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -19,6 +19,18 @@ import sys import gc from tempfile import NamedTemporaryFile +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version < '3': import cPickle as pickle @@ -115,6 +127,11 @@ def __reduce__(self): if __name__ == "__main__": import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py index 924da3eecf21..1ad11befb714 100644 --- a/python/pyspark/conf.py +++ b/python/pyspark/conf.py @@ -15,49 +15,22 @@ # limitations under the License. # -""" ->>> from pyspark.conf import SparkConf ->>> from pyspark.context import SparkContext ->>> conf = SparkConf() ->>> conf.setMaster("local").setAppName("My app") - ->>> conf.get("spark.master") -u'local' ->>> conf.get("spark.app.name") -u'My app' ->>> sc = SparkContext(conf=conf) ->>> sc.master -u'local' ->>> sc.appName -u'My app' ->>> sc.sparkHome is None -True - ->>> conf = SparkConf(loadDefaults=False) ->>> conf.setSparkHome("/path") - ->>> conf.get("spark.home") -u'/path' ->>> conf.setExecutorEnv("VAR1", "value1") - ->>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")]) - ->>> conf.get("spark.executorEnv.VAR1") -u'value1' ->>> print(conf.toDebugString()) -spark.executorEnv.VAR1=value1 -spark.executorEnv.VAR3=value3 -spark.executorEnv.VAR4=value4 -spark.home=/path ->>> sorted(conf.getAll(), key=lambda p: p[0]) -[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \ -(u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')] -""" - __all__ = ['SparkConf'] import sys import re +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version > '3': unicode = str @@ -86,6 +59,45 @@ class SparkConf(object): and can no longer be modified by the user. """ + """ + >>> from pyspark.conf import SparkConf + >>> from pyspark.context import SparkContext + >>> conf = SparkConf() + >>> conf.setMaster("local").setAppName("My app") + + >>> conf.get("spark.master") + u'local' + >>> conf.get("spark.app.name") + u'My app' + >>> sc = SparkContext(conf=conf) + >>> sc.master + u'local' + >>> sc.appName + u'My app' + >>> sc.sparkHome is None + True + + >>> conf = SparkConf(loadDefaults=False) + >>> conf.setSparkHome("/path") + + >>> conf.get("spark.home") + u'/path' + >>> conf.setExecutorEnv("VAR1", "value1") + + >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")]) + + >>> conf.get("spark.executorEnv.VAR1") + u'value1' + >>> print(conf.toDebugString()) + spark.executorEnv.VAR1=value1 + spark.executorEnv.VAR3=value3 + spark.executorEnv.VAR4=value4 + spark.home=/path + >>> sorted(conf.getAll(), key=lambda p: p[0]) + [(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \ + (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')] + """ + def __init__(self, loadDefaults=True, _jvm=None, _jconf=None): """ Create a new Spark configuration. @@ -182,8 +194,13 @@ def toDebugString(self): def _test(): import doctest - (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) - if failure_count: + t = doctest.DocTestSuite(optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 529d16b48039..bae44c22b714 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -24,6 +24,18 @@ import threading from threading import RLock from tempfile import NamedTemporaryFile +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import accumulators from pyspark.accumulators import Accumulator @@ -957,9 +969,14 @@ def _test(): globs['sc'] = SparkContext('local[4]', 'PythonTest') globs['tempdir'] = tempfile.mkdtemp() atexit.register(lambda: shutil.rmtree(globs['tempdir'])) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 16ad76483de6..47618af30ce4 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -16,6 +16,19 @@ # import warnings +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since from pyspark.ml.util import * @@ -880,6 +893,7 @@ def weights(self): import pyspark.ml.classification from pyspark.context import SparkContext from pyspark.sql import SQLContext + import tempfile globs = pyspark.ml.classification.__dict__.copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: @@ -887,11 +901,15 @@ def weights(self): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - import tempfile temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() finally: from shutil import rmtree @@ -899,5 +917,5 @@ def weights(self): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 1cea477acb47..fef2c4bc13b2 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -15,6 +15,20 @@ # limitations under the License. # +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest + from pyspark import since from pyspark.ml.util import * from pyspark.ml.wrapper import JavaEstimator, JavaModel @@ -297,6 +311,7 @@ def _create_model(self, java_model): import pyspark.ml.clustering from pyspark.context import SparkContext from pyspark.sql import SQLContext + import tempfile globs = pyspark.ml.clustering.__dict__.copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: @@ -304,11 +319,15 @@ def _create_model(self, java_model): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - import tempfile temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() finally: from shutil import rmtree @@ -316,5 +335,5 @@ def _create_model(self, java_model): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index c9b95b3bf45d..00f8fd3e1252 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -16,6 +16,19 @@ # from abc import abstractmethod, ABCMeta +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since from pyspark.ml.wrapper import JavaWrapper @@ -315,8 +328,12 @@ def setParams(self, predictionCol="prediction", labelCol="label", sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - (failure_count, test_count) = doctest.testmod( - globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 5025493c42c3..ad93708c2443 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -16,6 +16,18 @@ # import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version > '3': basestring = str @@ -2580,7 +2592,12 @@ def selectedFeatures(self): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() finally: from shutil import rmtree @@ -2588,5 +2605,5 @@ def selectedFeatures(self): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 2b605e5c5078..903addefbd42 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -15,6 +15,20 @@ # limitations under the License. # +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest + from pyspark import since from pyspark.ml.util import * from pyspark.ml.wrapper import JavaEstimator, JavaModel @@ -326,6 +340,7 @@ def itemFactors(self): import pyspark.ml.recommendation from pyspark.context import SparkContext from pyspark.sql import SQLContext + import tempfile globs = pyspark.ml.recommendation.__dict__.copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: @@ -333,11 +348,15 @@ def itemFactors(self): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - import tempfile temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() finally: from shutil import rmtree @@ -345,5 +364,5 @@ def itemFactors(self): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 6e23393f9102..844074806486 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -16,6 +16,19 @@ # import warnings +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since from pyspark.ml.param.shared import * @@ -906,6 +919,7 @@ def predict(self, features): import pyspark.ml.regression from pyspark.context import SparkContext from pyspark.sql import SQLContext + import tempfile globs = pyspark.ml.regression.__dict__.copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: @@ -913,11 +927,15 @@ def predict(self, features): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - import tempfile temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() finally: from shutil import rmtree @@ -925,5 +943,5 @@ def predict(self, features): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 77af0094dfca..76e26e08c716 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -17,6 +17,19 @@ import itertools import numpy as np +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since from pyspark.ml import Estimator, Model @@ -490,8 +503,12 @@ def copy(self, extra=None): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - (failure_count, test_count) = doctest.testmod( - globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) sc.stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 57106f8690a7..9660d68bcb1f 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -16,9 +16,22 @@ # from math import exp +import sys import numpy from numpy import array +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import RDD, since from pyspark.streaming import DStream @@ -753,9 +766,14 @@ def _test(): import pyspark.mllib.classification globs = pyspark.mllib.classification.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 23d118bd4090..e33681631027 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -18,6 +18,18 @@ import sys import array as pyarray import warnings +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version > '3': xrange = range @@ -1063,9 +1075,14 @@ def _test(): import pyspark.mllib.clustering globs = pyspark.mllib.clustering.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 22e68ea5b451..79437b4527cc 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -15,6 +15,20 @@ # limitations under the License. # +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest + from pyspark import since from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc from pyspark.sql import SQLContext @@ -520,9 +534,14 @@ def _test(): import pyspark.mllib.evaluation globs = pyspark.mllib.evaluation.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest') - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 612935352575..e098bb82e73f 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -24,6 +24,18 @@ import warnings import random import binascii +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = str unicode = str @@ -715,9 +727,14 @@ def _test(): from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index f339e5089116..b84c5927f05f 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -18,6 +18,19 @@ import numpy from numpy import array from collections import namedtuple +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import SparkContext, since from pyspark.rdd import ignore_unicode_prefix @@ -184,14 +197,19 @@ class FreqSequence(namedtuple("FreqSequence", ["sequence", "freq"])): def _test(): import doctest import pyspark.mllib.fpm + import tempfile globs = pyspark.mllib.fpm.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest') - import tempfile - temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path + try: - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() finally: from shutil import rmtree @@ -199,7 +217,7 @@ def _test(): rmtree(temp_path) except OSError: pass - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index abf00a473794..410076e7ed7d 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -26,6 +26,18 @@ import sys import array import struct +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = str @@ -1237,8 +1249,13 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values): def _test(): import doctest - (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS) - if failure_count: + t = doctest.DocTestSuite(optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 43cb0beef1bd..fd720f17cf33 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -20,6 +20,18 @@ """ import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': long = int @@ -926,9 +938,14 @@ def _test(): globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) globs['sqlContext'] = SQLContext(globs['sc']) globs['Matrices'] = Matrices - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py index 6a3c643b6641..e77efd7bdc8a 100644 --- a/python/pyspark/mllib/random.py +++ b/python/pyspark/mllib/random.py @@ -20,6 +20,19 @@ """ from functools import wraps +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since from pyspark.mllib.common import callMLlibFunc @@ -414,9 +427,14 @@ def _test(): # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 7e60255d43ea..60e05e74d833 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -16,7 +16,20 @@ # import array +import sys from collections import namedtuple +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import SparkContext, since from pyspark.rdd import RDD @@ -323,9 +336,14 @@ def _test(): sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 3b77a6200054..e9f4647d3cfb 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -17,6 +17,19 @@ import numpy as np from numpy import array +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import RDD, since from pyspark.streaming.dstream import DStream @@ -810,9 +823,14 @@ def _test(): import pyspark.mllib.regression globs = pyspark.mllib.regression.__dict__.copy() globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py index 7da921976d4d..85adad0f034d 100644 --- a/python/pyspark/mllib/stat/KernelDensity.py +++ b/python/pyspark/mllib/stat/KernelDensity.py @@ -21,6 +21,18 @@ xrange = range import numpy as np +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark.mllib.common import callMLlibFunc from pyspark.rdd import RDD @@ -37,7 +49,7 @@ class KernelDensity(object): >>> sample = sc.parallelize([0.0, 1.0]) >>> kd.setSample(sample) >>> kd.estimate([0.0, 1.0]) - array([ 0.12938758, 0.12938758]) + array([ 0.3204565, 0.3204565]) """ def __init__(self): self._bandwidth = 1.0 @@ -59,3 +71,23 @@ def estimate(self, points): densities = callMLlibFunc( "estimateKernelDensity", self._sample, self._bandwidth, points) return np.asarray(densities) + + +def _test(): + import doctest + from pyspark import SparkContext + globs = globals().copy() + globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + globs['sc'].stop() + if not result.wasSuccessful(): + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 36c8f48a4a88..376e307f6b6b 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -18,6 +18,18 @@ import sys if sys.version >= '3': basestring = str +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark.rdd import RDD, ignore_unicode_prefix from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper @@ -309,9 +321,14 @@ def _test(): from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/stat/distribution.py b/python/pyspark/mllib/stat/distribution.py index 46f7a1d2f277..f63c02452151 100644 --- a/python/pyspark/mllib/stat/distribution.py +++ b/python/pyspark/mllib/stat/distribution.py @@ -16,6 +16,19 @@ # from collections import namedtuple +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest __all__ = ['MultivariateGaussian'] @@ -30,3 +43,23 @@ class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])): >>> (m[0], m[1]) (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]])) """ + + +def _test(): + import doctest + from pyspark import SparkContext + globs = globals().copy() + globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + globs['sc'].stop() + if not result.wasSuccessful(): + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index f7ea466b4329..7777fc506e5b 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -18,6 +18,19 @@ from __future__ import absolute_import import random +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import SparkContext, RDD, since from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper @@ -658,9 +671,14 @@ def _test(): import doctest globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) if __name__ == "__main__": diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index 39bc6586dd58..bbdd6226e807 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -18,6 +18,18 @@ import sys import numpy as np import warnings +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version > '3': xrange = range @@ -352,9 +364,14 @@ def _test(): # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py index 44d17bd62947..349b73c4b327 100644 --- a/python/pyspark/profiler.py +++ b/python/pyspark/profiler.py @@ -19,6 +19,19 @@ import pstats import os import atexit +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark.accumulators import AccumulatorParam @@ -171,6 +184,11 @@ def stats(self): if __name__ == "__main__": import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 37574cea0b68..05f3b96eea97 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -33,6 +33,18 @@ from itertools import chain from functools import reduce from math import sqrt, log, isinf, isnan, pow, ceil +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version > '3': basestring = unicode = str @@ -2426,10 +2438,14 @@ def _test(): # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[4]', 'PythonTest') - (failure_count, test_count) = doctest.testmod( - globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index 2a1326947f4f..e338bcb5308f 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -15,40 +15,6 @@ # limitations under the License. # -""" -PySpark supports custom serializers for transferring data; this can improve -performance. - -By default, PySpark uses L{PickleSerializer} to serialize objects using Python's -C{cPickle} serializer, which can serialize nearly any Python object. -Other serializers, like L{MarshalSerializer}, support fewer datatypes but can be -faster. - -The serializer is chosen when creating L{SparkContext}: - ->>> from pyspark.context import SparkContext ->>> from pyspark.serializers import MarshalSerializer ->>> sc = SparkContext('local', 'test', serializer=MarshalSerializer()) ->>> sc.parallelize(list(range(1000))).map(lambda x: 2 * x).take(10) -[0, 2, 4, 6, 8, 10, 12, 14, 16, 18] ->>> sc.stop() - -PySpark serialize objects in batches; By default, the batch size is chosen based -on the size of objects, also configurable by SparkContext's C{batchSize} parameter: - ->>> sc = SparkContext('local', 'test', batchSize=2) ->>> rdd = sc.parallelize(range(16), 4).map(lambda x: x) - -Behind the scenes, this creates a JavaRDD with four partitions, each of -which contains two batches of two objects: - ->>> rdd.glom().collect() -[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]] ->>> int(rdd._jrdd.count()) -8 ->>> sc.stop() -""" - import sys from itertools import chain, product import marshal @@ -57,6 +23,18 @@ import collections import zlib import itertools +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version < '3': import cPickle as pickle @@ -438,6 +416,40 @@ class MarshalSerializer(FramedSerializer): This serializer is faster than PickleSerializer but supports fewer datatypes. """ + """ + PySpark supports custom serializers for transferring data; this can improve + performance. + + By default, PySpark uses L{PickleSerializer} to serialize objects using Python's + C{cPickle} serializer, which can serialize nearly any Python object. + Other serializers, like L{MarshalSerializer}, support fewer datatypes but can be + faster. + + The serializer is chosen when creating L{SparkContext}: + + >>> from pyspark.context import SparkContext + >>> from pyspark.serializers import MarshalSerializer + >>> sc = SparkContext('local', 'test', serializer=MarshalSerializer()) + >>> sc.parallelize(list(range(1000))).map(lambda x: 2 * x).take(10) + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + >>> sc.stop() + + PySpark serialize objects in batches; By default, the batch size is chosen based + on the size of objects, also configurable by SparkContext's C{batchSize} parameter: + + >>> sc = SparkContext('local', 'test', batchSize=2) + >>> rdd = sc.parallelize(range(16), 4).map(lambda x: x) + + Behind the scenes, this creates a JavaRDD with four partitions, each of + which contains two batches of two objects: + + >>> rdd.glom().collect() + [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]] + >>> int(rdd._jrdd.count()) + 8 + >>> sc.stop() + """ + def dumps(self, obj): return marshal.dumps(obj) @@ -557,6 +569,11 @@ def write_with_length(obj, stream): if __name__ == '__main__': import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index e974cda9fc3e..313e3be5ec02 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -23,6 +23,19 @@ import itertools import operator import random +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest import pyspark.heapq3 as heapq from pyspark.serializers import BatchedSerializer, PickleSerializer, FlattenedValuesSerializer, \ @@ -808,6 +821,11 @@ def load_partition(j): if __name__ == "__main__": import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 19ec6fcc5d6d..98fe7d50ed4b 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -17,6 +17,18 @@ import sys import warnings +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = str @@ -447,11 +459,16 @@ def _test(): .toDF(StructType([StructField('age', IntegerType()), StructField('name', StringType())])) - (failure_count, test_count) = doctest.testmod( - pyspark.sql.column, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + t = doctest.DocTestSuite(pyspark.sql.column, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 9c2f6a3c5660..073272a77459 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -20,6 +20,18 @@ import warnings import json from functools import reduce +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = unicode = str @@ -670,11 +682,15 @@ def _test(): ] globs['jsonStrings'] = jsonStrings globs['json'] = sc.parallelize(jsonStrings) - (failure_count, test_count) = doctest.testmod( - pyspark.sql.context, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) + t = doctest.DocTestSuite(pyspark.sql.context, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7e1854c43be3..8261a1ab869b 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -18,6 +18,18 @@ import sys import warnings import random +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = unicode = str @@ -1467,11 +1479,16 @@ def _test(): Row(name='Tom', age=None, height=None), Row(name=None, age=None, height=None)]).toDF() - (failure_count, test_count) = doctest.testmod( - pyspark.sql.dataframe, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + t = doctest.DocTestSuite(pyspark.sql.dataframe, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index dee3d536be43..31ed8b4ead09 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -20,6 +20,18 @@ """ import math import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version < "3": from itertools import imap as map @@ -1693,11 +1705,15 @@ def _test(): globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) globs['df'] = sc.parallelize([Row(name='Alice', age=2), Row(name='Bob', age=5)]).toDF() - (failure_count, test_count) = doctest.testmod( - pyspark.sql.functions, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) + t = doctest.DocTestSuite(pyspark.sql.functions, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py index ee734cb43928..d408b67f6fe0 100644 --- a/python/pyspark/sql/group.py +++ b/python/pyspark/sql/group.py @@ -15,6 +15,20 @@ # limitations under the License. # +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest + from pyspark import since from pyspark.rdd import ignore_unicode_prefix from pyspark.sql.column import Column, _to_seq, _to_java_column, _create_column_from_literal @@ -213,11 +227,16 @@ def _test(): Row(course="dotNET", year=2013, earnings=48000), Row(course="Java", year=2013, earnings=30000)]).toDF() - (failure_count, test_count) = doctest.testmod( - pyspark.sql.group, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + t = doctest.DocTestSuite(pyspark.sql.group, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 438662bb157f..80c169ba7d78 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -16,6 +16,18 @@ # import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= '3': basestring = unicode = str @@ -615,11 +627,16 @@ def _test(): globs['hiveContext'] = HiveContext(sc) globs['df'] = globs['sqlContext'].read.parquet('python/test_support/sql/parquet_partitioned') - (failure_count, test_count) = doctest.testmod( - pyspark.sql.readwriter, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + t = doctest.DocTestSuite(pyspark.sql.readwriter, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 734c1533a24b..0e130c05fa3a 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -24,6 +24,18 @@ import re import base64 from array import array +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest if sys.version >= "3": long = int @@ -1483,9 +1495,14 @@ def _test(): sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) globs['sc'].stop() - if failure_count: + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py index 57bbe340bbd4..891cd5019af9 100644 --- a/python/pyspark/sql/window.py +++ b/python/pyspark/sql/window.py @@ -16,6 +16,18 @@ # import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import since, SparkContext from pyspark.sql.column import _to_seq, _to_java_column @@ -146,9 +158,15 @@ def rangeBetween(self, start, end): def _test(): import doctest - SparkContext('local[4]', 'PythonTest') - (failure_count, test_count) = doctest.testmod() - if failure_count: + sc = SparkContext('local[4]', 'PythonTest') + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + sc.stop() + if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py index 03ea0b6d33c9..3a5ab0b21176 100644 --- a/python/pyspark/statcounter.py +++ b/python/pyspark/statcounter.py @@ -19,6 +19,19 @@ import copy import math +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest try: from numpy import maximum, minimum, sqrt @@ -156,3 +169,15 @@ def asDict(self, sample=False): def __repr__(self): return ("(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" % (self.count(), self.mean(), self.stdev(), self.max(), self.min())) + + +if __name__ == "__main__": + import doctest + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): + exit(-1) diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py index abbbf6eb9394..01643a008c98 100644 --- a/python/pyspark/streaming/util.py +++ b/python/pyspark/streaming/util.py @@ -18,6 +18,19 @@ import time from datetime import datetime import traceback +import sys +try: + import xmlrunner +except ImportError: + xmlrunner = None +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest from pyspark import SparkContext, RDD @@ -145,6 +158,11 @@ def rddToFileName(prefix, suffix, timestamp): if __name__ == "__main__": import doctest - (failure_count, test_count) = doctest.testmod() - if failure_count: + t = doctest.DocTestSuite() + if xmlrunner: + result = xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + result = unittest.TextTestRunner(verbosity=3).run(t) + if not result.wasSuccessful(): exit(-1) From bd5619ed068bb08f33c4cc4ae55ba2c9a8e06bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Sun, 24 Jan 2016 21:49:25 -0500 Subject: [PATCH 2/4] SPARK-11741 Centralize doctest processing --- python/pyspark/accumulators.py | 21 ++--------- python/pyspark/broadcast.py | 21 ++--------- python/pyspark/conf.py | 20 ++--------- python/pyspark/context.py | 21 ++--------- python/pyspark/doctesthelper.py | 41 ++++++++++++++++++++++ python/pyspark/ml/classification.py | 21 ++--------- python/pyspark/ml/clustering.py | 22 ++---------- python/pyspark/ml/evaluation.py | 22 ++---------- python/pyspark/ml/feature.py | 21 ++--------- python/pyspark/ml/recommendation.py | 22 ++---------- python/pyspark/ml/regression.py | 22 ++---------- python/pyspark/ml/tuning.py | 22 ++---------- python/pyspark/mllib/classification.py | 22 ++---------- python/pyspark/mllib/clustering.py | 21 ++--------- python/pyspark/mllib/evaluation.py | 22 ++---------- python/pyspark/mllib/feature.py | 21 ++--------- python/pyspark/mllib/fpm.py | 22 ++---------- python/pyspark/mllib/linalg/__init__.py | 21 ++--------- python/pyspark/mllib/linalg/distributed.py | 21 ++--------- python/pyspark/mllib/random.py | 22 ++---------- python/pyspark/mllib/recommendation.py | 22 ++---------- python/pyspark/mllib/regression.py | 22 ++---------- python/pyspark/mllib/stat/KernelDensity.py | 21 ++--------- python/pyspark/mllib/stat/_statistics.py | 21 ++--------- python/pyspark/mllib/stat/distribution.py | 22 ++---------- python/pyspark/mllib/tree.py | 22 ++---------- python/pyspark/mllib/util.py | 21 ++--------- python/pyspark/profiler.py | 22 ++---------- python/pyspark/rdd.py | 21 ++--------- python/pyspark/serializers.py | 21 ++--------- python/pyspark/shuffle.py | 22 ++---------- python/pyspark/sql/column.py | 24 +++---------- python/pyspark/sql/context.py | 22 ++---------- python/pyspark/sql/dataframe.py | 24 +++---------- python/pyspark/sql/functions.py | 22 ++---------- python/pyspark/sql/group.py | 25 +++---------- python/pyspark/sql/readwriter.py | 24 +++---------- python/pyspark/sql/types.py | 21 ++--------- python/pyspark/sql/window.py | 22 ++---------- python/pyspark/statcounter.py | 22 ++---------- python/pyspark/streaming/util.py | 22 ++---------- 41 files changed, 155 insertions(+), 758 deletions(-) create mode 100644 python/pyspark/doctesthelper.py diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py index 884295a2d350..ea293287a831 100644 --- a/python/pyspark/accumulators.py +++ b/python/pyspark/accumulators.py @@ -25,19 +25,6 @@ import threading from pyspark.cloudpickle import CloudPickler from pyspark.serializers import read_int, PickleSerializer -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest -import doctest -try: - import xmlrunner -except ImportError: - xmlrunner = None __all__ = ['Accumulator', 'AccumulatorParam'] @@ -275,11 +262,7 @@ def _start_update_server(): return server if __name__ == "__main__": - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 4f2d337c00f5..ab30a3509e4d 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -19,18 +19,6 @@ import sys import gc from tempfile import NamedTemporaryFile -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version < '3': import cPickle as pickle @@ -126,12 +114,7 @@ def __reduce__(self): if __name__ == "__main__": - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py index 1ad11befb714..067e00b4bdb6 100644 --- a/python/pyspark/conf.py +++ b/python/pyspark/conf.py @@ -19,18 +19,6 @@ import sys import re -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version > '3': unicode = str @@ -194,12 +182,8 @@ def toDebugString(self): def _test(): import doctest - t = doctest.DocTestSuite(optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__, optionflags=doctest.ELLIPSIS) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index bae44c22b714..4a378c9e85b9 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -24,18 +24,6 @@ import threading from threading import RLock from tempfile import NamedTemporaryFile -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import accumulators from pyspark.accumulators import Accumulator @@ -963,18 +951,15 @@ def dump_profiles(self, path): def _test(): import atexit + from pyspark.doctesthelper import run_doctests import doctest import tempfile globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest') globs['tempdir'] = tempfile.mkdtemp() atexit.register(lambda: shutil.rmtree(globs['tempdir'])) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/doctesthelper.py b/python/pyspark/doctesthelper.py new file mode 100644 index 000000000000..d104d84c5531 --- /dev/null +++ b/python/pyspark/doctesthelper.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +if sys.version_info[:2] <= (2, 6): + try: + import unittest2 as unittest + except ImportError: + sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') + sys.exit(1) +else: + import unittest +import doctest +try: + import xmlrunner +except ImportError: + xmlrunner = None + + +def run_doctests(file_name, globs={}, optionflags=None): + t = doctest.DocFileSuite(file_name, module_relative=False, + globs=globs, optionflags=optionflags) + if xmlrunner: + return xmlrunner.XMLTestRunner(output='target/test-reports', + verbosity=3).run(t) + else: + return unittest.TextTestRunner(verbosity=3).run(t) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 47618af30ce4..94228b2abd0e 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -17,18 +17,6 @@ import warnings import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml.util import * @@ -891,6 +879,7 @@ def weights(self): if __name__ == "__main__": import doctest import pyspark.ml.classification + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext import tempfile @@ -904,12 +893,8 @@ def weights(self): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() finally: from shutil import rmtree diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index fef2c4bc13b2..5ca8c8461ce6 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -15,19 +15,6 @@ # limitations under the License. # -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml.util import * @@ -309,6 +296,7 @@ def _create_model(self, java_model): if __name__ == "__main__": import doctest import pyspark.ml.clustering + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext import tempfile @@ -322,12 +310,8 @@ def _create_model(self, java_model): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() finally: from shutil import rmtree diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 00f8fd3e1252..bcbbbf12ac51 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -16,19 +16,6 @@ # from abc import abstractmethod, ABCMeta -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml.wrapper import JavaWrapper @@ -319,6 +306,7 @@ def setParams(self, predictionCol="prediction", labelCol="label", if __name__ == "__main__": import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext globs = globals().copy() @@ -328,12 +316,8 @@ def setParams(self, predictionCol="prediction", labelCol="label", sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index ad93708c2443..ccc3eedb1e0f 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -16,18 +16,6 @@ # import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version > '3': basestring = str @@ -2572,6 +2560,7 @@ def selectedFeatures(self): import tempfile import pyspark.ml.feature + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext @@ -2592,12 +2581,8 @@ def selectedFeatures(self): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() finally: from shutil import rmtree diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 903addefbd42..6296c1283007 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -15,19 +15,6 @@ # limitations under the License. # -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml.util import * @@ -338,6 +325,7 @@ def itemFactors(self): if __name__ == "__main__": import doctest import pyspark.ml.recommendation + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext import tempfile @@ -351,12 +339,8 @@ def itemFactors(self): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() finally: from shutil import rmtree diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 844074806486..d01e18120f91 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -16,19 +16,6 @@ # import warnings -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml.param.shared import * @@ -917,6 +904,7 @@ def predict(self, features): if __name__ == "__main__": import doctest import pyspark.ml.regression + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext import tempfile @@ -930,12 +918,8 @@ def predict(self, features): temp_path = tempfile.mkdtemp() globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() finally: from shutil import rmtree diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 76e26e08c716..d3aa1ac52258 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -17,19 +17,6 @@ import itertools import numpy as np -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.ml import Estimator, Model @@ -494,6 +481,7 @@ def copy(self, extra=None): if __name__ == "__main__": import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext globs = globals().copy() @@ -503,12 +491,8 @@ def copy(self, extra=None): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) sc.stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 9660d68bcb1f..2f6431f9f070 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -16,22 +16,9 @@ # from math import exp -import sys import numpy from numpy import array -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import RDD, since from pyspark.streaming import DStream @@ -762,16 +749,13 @@ def update(rdd): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext import pyspark.mllib.classification globs = pyspark.mllib.classification.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index e33681631027..c70ad517c57b 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -18,18 +18,6 @@ import sys import array as pyarray import warnings -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version > '3': xrange = range @@ -1072,15 +1060,12 @@ def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0, def _test(): import doctest + from pyspark.doctesthelper import run_doctests import pyspark.mllib.clustering globs = pyspark.mllib.clustering.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 79437b4527cc..6a5be9d23da1 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -15,19 +15,6 @@ # limitations under the License. # -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc @@ -530,16 +517,13 @@ def accuracy(self): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext import pyspark.mllib.evaluation globs = pyspark.mllib.evaluation.__dict__.copy() globs['sc'] = SparkContext('local[4]', 'PythonTest') - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index e098bb82e73f..42fd1ec5d5f0 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -24,18 +24,6 @@ import warnings import random import binascii -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = str unicode = str @@ -724,15 +712,12 @@ def transform(self, vector): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index b84c5927f05f..58e1b69b825c 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -18,19 +18,6 @@ import numpy from numpy import array from collections import namedtuple -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import SparkContext, since from pyspark.rdd import ignore_unicode_prefix @@ -196,6 +183,7 @@ class FreqSequence(namedtuple("FreqSequence", ["sequence", "freq"])): def _test(): import doctest + from pyspark.doctesthelper import run_doctests import pyspark.mllib.fpm import tempfile globs = pyspark.mllib.fpm.__dict__.copy() @@ -204,12 +192,8 @@ def _test(): globs['temp_path'] = temp_path try: - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() finally: from shutil import rmtree diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 410076e7ed7d..e88910aac430 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -26,18 +26,6 @@ import sys import array import struct -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = str @@ -1248,13 +1236,8 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values): def _test(): - import doctest - t = doctest.DocTestSuite(optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__, optionflags=doctest.ELLIPSIS) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index fd720f17cf33..abbb32d486c0 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -20,18 +20,6 @@ """ import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': long = int @@ -930,6 +918,7 @@ def toCoordinateMatrix(self): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext from pyspark.sql import SQLContext from pyspark.mllib.linalg import Matrices @@ -938,12 +927,8 @@ def _test(): globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) globs['sqlContext'] = SQLContext(globs['sc']) globs['Matrices'] = Matrices - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py index e77efd7bdc8a..efaca9e6e387 100644 --- a/python/pyspark/mllib/random.py +++ b/python/pyspark/mllib/random.py @@ -20,19 +20,6 @@ """ from functools import wraps -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.mllib.common import callMLlibFunc @@ -422,17 +409,14 @@ def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed= def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 60e05e74d833..5ca1b5d2978b 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -16,20 +16,7 @@ # import array -import sys from collections import namedtuple -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import SparkContext, since from pyspark.rdd import RDD @@ -330,18 +317,15 @@ def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alp def _test(): import doctest + from pyspark.doctesthelper import run_doctests import pyspark.mllib.recommendation from pyspark.sql import SQLContext globs = pyspark.mllib.recommendation.__dict__.copy() sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index e9f4647d3cfb..d7f456336819 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -17,19 +17,6 @@ import numpy as np from numpy import array -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import RDD, since from pyspark.streaming.dstream import DStream @@ -819,16 +806,13 @@ def update(rdd): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext import pyspark.mllib.regression globs = pyspark.mllib.regression.__dict__.copy() globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py index 85adad0f034d..4564c5c9ed74 100644 --- a/python/pyspark/mllib/stat/KernelDensity.py +++ b/python/pyspark/mllib/stat/KernelDensity.py @@ -21,18 +21,6 @@ xrange = range import numpy as np -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark.mllib.common import callMLlibFunc from pyspark.rdd import RDD @@ -75,15 +63,12 @@ def estimate(self, points): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 376e307f6b6b..69ff179dae24 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -18,18 +18,6 @@ import sys if sys.version >= '3': basestring = str -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark.rdd import RDD, ignore_unicode_prefix from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper @@ -318,15 +306,12 @@ def kolmogorovSmirnovTest(data, distName="norm", *params): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/stat/distribution.py b/python/pyspark/mllib/stat/distribution.py index f63c02452151..d642badb22b2 100644 --- a/python/pyspark/mllib/stat/distribution.py +++ b/python/pyspark/mllib/stat/distribution.py @@ -16,19 +16,6 @@ # from collections import namedtuple -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest __all__ = ['MultivariateGaussian'] @@ -47,15 +34,12 @@ class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark import SparkContext globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index 7777fc506e5b..ce47c22afdb1 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -18,19 +18,6 @@ from __future__ import absolute_import import random -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import SparkContext, RDD, since from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper @@ -669,14 +656,11 @@ def trainRegressor(cls, data, categoricalFeaturesInfo, def _test(): import doctest + from pyspark.doctesthelper import run_doctests globs = globals().copy() globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index bbdd6226e807..cc26421d8a10 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -18,18 +18,6 @@ import sys import numpy as np import warnings -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version > '3': xrange = range @@ -359,17 +347,14 @@ def generateLinearRDD(sc, nexamples, nfeatures, eps, def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py index 349b73c4b327..7ad929715089 100644 --- a/python/pyspark/profiler.py +++ b/python/pyspark/profiler.py @@ -19,19 +19,6 @@ import pstats import os import atexit -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark.accumulators import AccumulatorParam @@ -183,12 +170,7 @@ def stats(self): if __name__ == "__main__": - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 05f3b96eea97..3a20a7b31078 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -33,18 +33,6 @@ from itertools import chain from functools import reduce from math import sqrt, log, isinf, isnan, pow, ceil -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version > '3': basestring = unicode = str @@ -2432,18 +2420,15 @@ def _is_pipelinable(self): def _test(): + from pyspark.doctesthelper import run_doctests import doctest from pyspark.context import SparkContext globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['sc'] = SparkContext('local[4]', 'PythonTest') - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index e338bcb5308f..e79faf48f0ce 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -23,18 +23,6 @@ import collections import zlib import itertools -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version < '3': import cPickle as pickle @@ -568,12 +556,7 @@ def write_with_length(obj, stream): if __name__ == '__main__': - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 313e3be5ec02..93c33f759e08 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -23,19 +23,6 @@ import itertools import operator import random -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest import pyspark.heapq3 as heapq from pyspark.serializers import BatchedSerializer, PickleSerializer, FlattenedValuesSerializer, \ @@ -820,12 +807,7 @@ def load_partition(j): if __name__ == "__main__": - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 98fe7d50ed4b..92dbe0ea6913 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -17,18 +17,6 @@ import sys import warnings -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = str @@ -448,6 +436,7 @@ def __repr__(self): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext import pyspark.sql.column @@ -459,14 +448,9 @@ def _test(): .toDF(StructType([StructField('age', IntegerType()), StructField('name', StringType())])) - t = doctest.DocTestSuite(pyspark.sql.column, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 073272a77459..3a4864eb9bc5 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -20,18 +20,6 @@ import warnings import json from functools import reduce -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = unicode = str @@ -657,6 +645,7 @@ def register(self, name, f, returnType=StringType()): def _test(): import os import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext import pyspark.sql.context @@ -682,13 +671,8 @@ def _test(): ] globs['jsonStrings'] = jsonStrings globs['json'] = sc.parallelize(jsonStrings) - t = doctest.DocTestSuite(pyspark.sql.context, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8261a1ab869b..32580dde5638 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -18,18 +18,6 @@ import sys import warnings import random -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = unicode = str @@ -1461,6 +1449,7 @@ def sampleBy(self, col, fractions, seed=None): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext import pyspark.sql.dataframe @@ -1479,14 +1468,9 @@ def _test(): Row(name='Tom', age=None, height=None), Row(name=None, age=None, height=None)]).toDF() - t = doctest.DocTestSuite(pyspark.sql.dataframe, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 31ed8b4ead09..d3b08ea8df87 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -20,18 +20,6 @@ """ import math import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version < "3": from itertools import imap as map @@ -1697,6 +1685,7 @@ def udf(f, returnType=StringType()): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext import pyspark.sql.functions @@ -1705,13 +1694,8 @@ def _test(): globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) globs['df'] = sc.parallelize([Row(name='Alice', age=2), Row(name='Bob', age=5)]).toDF() - t = doctest.DocTestSuite(pyspark.sql.functions, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py index d408b67f6fe0..ad6a1fce4375 100644 --- a/python/pyspark/sql/group.py +++ b/python/pyspark/sql/group.py @@ -15,19 +15,6 @@ # limitations under the License. # -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since from pyspark.rdd import ignore_unicode_prefix @@ -209,6 +196,7 @@ def pivot(self, pivot_col, values=None): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext import pyspark.sql.group @@ -227,14 +215,9 @@ def _test(): Row(course="dotNET", year=2013, earnings=48000), Row(course="Java", year=2013, earnings=30000)]).toDF() - t = doctest.DocTestSuite(pyspark.sql.group, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 80c169ba7d78..213cb24afa33 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -16,18 +16,6 @@ # import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= '3': basestring = unicode = str @@ -611,6 +599,7 @@ def _test(): import doctest import os import tempfile + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import Row, SQLContext, HiveContext import pyspark.sql.readwriter @@ -627,14 +616,9 @@ def _test(): globs['hiveContext'] = HiveContext(sc) globs['df'] = globs['sqlContext'].read.parquet('python/test_support/sql/parquet_partitioned') - t = doctest.DocTestSuite(pyspark.sql.readwriter, globs=globs, - optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 0e130c05fa3a..0701323b1c6a 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -24,18 +24,6 @@ import re import base64 from array import array -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest if sys.version >= "3": long = int @@ -1489,18 +1477,15 @@ def convert(self, obj, gateway_client): def _test(): import doctest + from pyspark.doctesthelper import run_doctests from pyspark.context import SparkContext from pyspark.sql import SQLContext globs = globals() sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) - t = doctest.DocTestSuite(globs=globs, optionflags=doctest.ELLIPSIS) - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__, globs=globs, + optionflags=doctest.ELLIPSIS) globs['sc'].stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py index 891cd5019af9..5e103a57e019 100644 --- a/python/pyspark/sql/window.py +++ b/python/pyspark/sql/window.py @@ -15,19 +15,6 @@ # limitations under the License. # -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import since, SparkContext from pyspark.sql.column import _to_seq, _to_java_column @@ -157,14 +144,9 @@ def rangeBetween(self, start, end): def _test(): - import doctest + from pyspark.doctesthelper import run_doctests sc = SparkContext('local[4]', 'PythonTest') - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + result = run_doctests(__file__) sc.stop() if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py index 3a5ab0b21176..770a96cc85e0 100644 --- a/python/pyspark/statcounter.py +++ b/python/pyspark/statcounter.py @@ -19,19 +19,6 @@ import copy import math -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest try: from numpy import maximum, minimum, sqrt @@ -172,12 +159,7 @@ def __repr__(self): if __name__ == "__main__": - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py index 01643a008c98..18ead72ee720 100644 --- a/python/pyspark/streaming/util.py +++ b/python/pyspark/streaming/util.py @@ -18,19 +18,6 @@ import time from datetime import datetime import traceback -import sys -try: - import xmlrunner -except ImportError: - xmlrunner = None -if sys.version_info[:2] <= (2, 6): - try: - import unittest2 as unittest - except ImportError: - sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier') - sys.exit(1) -else: - import unittest from pyspark import SparkContext, RDD @@ -157,12 +144,7 @@ def rddToFileName(prefix, suffix, timestamp): if __name__ == "__main__": - import doctest - t = doctest.DocTestSuite() - if xmlrunner: - result = xmlrunner.XMLTestRunner(output='target/test-reports', - verbosity=3).run(t) - else: - result = unittest.TextTestRunner(verbosity=3).run(t) + from pyspark.doctesthelper import run_doctests + result = run_doctests(__file__) if not result.wasSuccessful(): exit(-1) From 6e9933bc46be520cd0f5339f7d62014bb7a0d6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Thu, 11 Feb 2016 18:52:16 -0500 Subject: [PATCH 3/4] Correct optionflags default value --- python/pyspark/doctesthelper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/doctesthelper.py b/python/pyspark/doctesthelper.py index d104d84c5531..8947d617da43 100644 --- a/python/pyspark/doctesthelper.py +++ b/python/pyspark/doctesthelper.py @@ -31,7 +31,7 @@ xmlrunner = None -def run_doctests(file_name, globs={}, optionflags=None): +def run_doctests(file_name, globs={}, optionflags=0): t = doctest.DocFileSuite(file_name, module_relative=False, globs=globs, optionflags=optionflags) if xmlrunner: From 4e34b246985bec0a388aad1f877ba6fab52c95f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Thu, 11 Feb 2016 20:00:52 -0500 Subject: [PATCH 4/4] Clean extra whitespace --- python/pyspark/rdd.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 3a20a7b31078..bb5185a95032 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -2004,20 +2004,20 @@ def keyBy(self, f): def repartition(self, numPartitions): """ - Return a new RDD that has exactly numPartitions partitions. - - Can increase or decrease the level of parallelism in this RDD. - Internally, this uses a shuffle to redistribute data. - If you are decreasing the number of partitions in this RDD, consider - using `coalesce`, which can avoid performing a shuffle. - - >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4) - >>> sorted(rdd.glom().collect()) - [[1], [2, 3], [4, 5], [6, 7]] - >>> len(rdd.repartition(2).glom().collect()) - 2 - >>> len(rdd.repartition(10).glom().collect()) - 10 + Return a new RDD that has exactly numPartitions partitions. + + Can increase or decrease the level of parallelism in this RDD. + Internally, this uses a shuffle to redistribute data. + If you are decreasing the number of partitions in this RDD, consider + using `coalesce`, which can avoid performing a shuffle. + + >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4) + >>> sorted(rdd.glom().collect()) + [[1], [2, 3], [4, 5], [6, 7]] + >>> len(rdd.repartition(2).glom().collect()) + 2 + >>> len(rdd.repartition(10).glom().collect()) + 10 """ jrdd = self._jrdd.repartition(numPartitions) return RDD(jrdd, self.ctx, self._jrdd_deserializer)