Code review feedback.

rxin · rxin · commit baa8ad54a7f5 · 2015-06-02T21:56:46.000-07:00
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
@@ -46,7 +46,7 @@
 
 def since(version):
     """
-    Annotates a function to append the version of Spark the function was added.
+    A decorator that annotates a function to append the version of Spark the function was added.
     """
     import re
     indent_p = re.compile(r'\n( +)')
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
@@ -198,8 +198,8 @@ def _inferSchema(self, rdd, samplingRatio=None):
             raise ValueError("The first row in RDD is empty, "
                              "can not infer schema")
         if type(first) is dict:
-            warnings.warn("Using RDD of dict to inferSchema is deprecated,"
-                          "please use pyspark.sql.Row instead", DeprecationWarning)
+            warnings.warn("Using RDD of dict to inferSchema is deprecated. "
+                          "Use pyspark.sql.Row instead")
 
         if samplingRatio is None:
             schema = _infer_schema(first)
@@ -222,8 +222,7 @@ def inferSchema(self, rdd, samplingRatio=None):
         """
         .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
         """
-        warnings.warn(
-            "inferSchema is deprecated, please use createDataFrame instead.", DeprecationWarning)
+        warnings.warn("inferSchema is deprecated, please use createDataFrame instead.")
 
         if isinstance(rdd, DataFrame):
             raise TypeError("Cannot apply schema to DataFrame")
@@ -235,8 +234,7 @@ def applySchema(self, rdd, schema):
         """
         .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
         """
-        warnings.warn(
-            "applySchema is deprecated, please use createDataFrame instead", DeprecationWarning)
+        warnings.warn("applySchema is deprecated, please use createDataFrame instead")
 
         if isinstance(rdd, DataFrame):
             raise TypeError("Cannot apply schema to DataFrame")
@@ -369,8 +367,11 @@ def parquetFile(self, *paths):
         """Loads a Parquet file, returning the result as a :class:`DataFrame`.
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameReader.parquet` instead.
+
+        >>> sqlContext.parquetFile('python/test_support/sql/parquet_partitioned').dtypes
+        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
         """
-        warnings.warn("parquetFile is deprecated. Use read.parquet() instead.", DeprecationWarning)
+        warnings.warn("parquetFile is deprecated. Use read.parquet() instead.")
         gateway = self._sc._gateway
         jpaths = gateway.new_array(gateway.jvm.java.lang.String, len(paths))
         for i in range(0, len(paths)):
@@ -382,8 +383,11 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
         """Loads a text file storing one JSON object per line as a :class:`DataFrame`.
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameReader.json` instead.
+
+        >>> sqlContext.jsonFile('python/test_support/sql/people.json').dtypes
+        [('age', 'bigint'), ('name', 'string')]
         """
-        warnings.warn("jsonFile is deprecated. Use read.json() instead.", DeprecationWarning)
+        warnings.warn("jsonFile is deprecated. Use read.json() instead.")
         if schema is None:
             df = self._ssql_ctx.jsonFile(path, samplingRatio)
         else:
@@ -440,7 +444,7 @@ def load(self, path=None, source=None, schema=None, **options):
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameReader.load` instead.
         """
-        warnings.warn("load is deprecated. Use read.load() instead.", DeprecationWarning)
+        warnings.warn("load is deprecated. Use read.load() instead.")
         return self.read.load(path, source, schema, **options)
 
     @since(1.3)
@@ -621,10 +625,14 @@ def register(self, name, f, returnType=StringType()):
 
 
 def _test():
+    import os
     import doctest
     from pyspark.context import SparkContext
     from pyspark.sql import Row, SQLContext
     import pyspark.sql.context
+
+    os.chdir(os.environ["SPARK_HOME"])
+
     globs = pyspark.sql.context.__dict__.copy()
     sc = SparkContext('local[4]', 'PythonTest')
     globs['sc'] = sc
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -125,8 +125,7 @@ def saveAsParquetFile(self, path):
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameWriter.parquet` instead.
         """
-        warnings.warn(
-            "saveAsParquetFile is deprecated. Use write.parquet() instead.", DeprecationWarning)
+        warnings.warn("saveAsParquetFile is deprecated. Use write.parquet() instead.")
         self._jdf.saveAsParquetFile(path)
 
     @since(1.3)
@@ -147,25 +146,23 @@ def registerAsTable(self, name):
         """
         .. note:: Deprecated in 1.4, use :func:`registerTempTable` instead.
         """
-        warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning)
+        warnings.warn("Use registerTempTable instead of registerAsTable.")
         self.registerTempTable(name)
 
     def insertInto(self, tableName, overwrite=False):
         """Inserts the contents of this :class:`DataFrame` into the specified table.
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameWriter.insertInto` instead.
         """
-        warnings.warn(
-            "insertInto is deprecated. Use write.insertInto() instead.", DeprecationWarning)
+        warnings.warn("insertInto is deprecated. Use write.insertInto() instead.")
         self.write.insertInto(tableName, overwrite)
 
     def saveAsTable(self, tableName, source=None, mode="error", **options):
         """Saves the contents of this :class:`DataFrame` to a data source as a table.
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameWriter.saveAsTable` instead.
         """
-        warnings.warn(
-            "insertInto is deprecated. Use write.saveAsTable() instead.", DeprecationWarning)
+        warnings.warn("insertInto is deprecated. Use write.saveAsTable() instead.")
         self.write.saveAsTable(tableName, source, mode, **options)
 
     @since(1.3)
@@ -174,8 +171,7 @@ def save(self, path=None, source=None, mode="error", **options):
 
         .. note:: Deprecated in 1.4, use :func:`DataFrameWriter.save` instead.
         """
-        warnings.warn(
-            "insertInto is deprecated. Use write.save() instead.", DeprecationWarning)
+        warnings.warn("insertInto is deprecated. Use write.save() instead.")
         return self.write.save(path, source, mode, **options)
 
     @property
@@ -629,9 +625,9 @@ def describe(self, *cols):
     def head(self, n=None):
         """Returns the first ``n`` rows.
 
-        If n is greater than 1, return a list of :class:`Row`. If n is 1, return a single Row.
-
-        :param n: int, default 1.
+        :param n: int, default 1. Number of rows to return.
+        :return: If n is greater than 1, return a list of :class:`Row`.
+            If n is 1, return a single Row.
 
         >>> df.head()
         Row(age=2, name=u'Alice')
diff --git a/python/run-tests b/python/run-tests
@@ -70,13 +70,13 @@ function run_core_tests() {
 
 function run_sql_tests() {
     echo "Run sql tests ..."
-    run_test "pyspark.sql.readwriter"
     run_test "pyspark.sql.types"
     run_test "pyspark.sql.context"
     run_test "pyspark.sql.column"
     run_test "pyspark.sql.dataframe"
     run_test "pyspark.sql.group"
     run_test "pyspark.sql.functions"
+    run_test "pyspark.sql.readwriter"
     run_test "pyspark.sql.window"
     run_test "pyspark.sql.tests"
 }
@@ -142,11 +142,11 @@ fi
 echo "Testing with Python version:"
 $PYSPARK_PYTHON --version
 
-#run_core_tests
+run_core_tests
 run_sql_tests
-#run_mllib_tests
-#run_ml_tests
-#run_streaming_tests
+run_mllib_tests
+run_ml_tests
+run_streaming_tests
 
 # Try to test with Python 3
 if [ $(which python3.4) ]; then