@@ -124,7 +124,10 @@ def getConf(self, key, defaultValue):
124124 @property
125125 @since ("1.3.1" )
126126 def udf (self ):
127- """Returns a :class:`UDFRegistration` for UDF registration."""
127+ """Returns a :class:`UDFRegistration` for UDF registration.
128+
129+ :return: :class:`UDFRegistration`
130+ """
128131 return UDFRegistration (self )
129132
130133 @since (1.4 )
@@ -138,7 +141,7 @@ def range(self, start, end, step=1, numPartitions=None):
138141 :param end: the end value (exclusive)
139142 :param step: the incremental step (default: 1)
140143 :param numPartitions: the number of partitions of the DataFrame
141- :return: A new DataFrame
144+ :return: :class:` DataFrame`
142145
143146 >>> sqlContext.range(1, 7, 2).collect()
144147 [Row(id=1), Row(id=3), Row(id=5)]
@@ -196,7 +199,7 @@ def _inferSchema(self, rdd, samplingRatio=None):
196199 "can not infer schema" )
197200 if type (first ) is dict :
198201 warnings .warn ("Using RDD of dict to inferSchema is deprecated,"
199- "please use pyspark.sql.Row instead" )
202+ "please use pyspark.sql.Row instead" , DeprecationWarning )
200203
201204 if samplingRatio is None :
202205 schema = _infer_schema (first )
@@ -219,7 +222,8 @@ def inferSchema(self, rdd, samplingRatio=None):
219222 """
220223 .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
221224 """
222- warnings .warn ("inferSchema is deprecated, please use createDataFrame instead" )
225+ warnings .warn (
226+ "inferSchema is deprecated, please use createDataFrame instead." , DeprecationWarning )
223227
224228 if isinstance (rdd , DataFrame ):
225229 raise TypeError ("Cannot apply schema to DataFrame" )
@@ -231,7 +235,8 @@ def applySchema(self, rdd, schema):
231235 """
232236 .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
233237 """
234- warnings .warn ("applySchema is deprecated, please use createDataFrame instead" )
238+ warnings .warn (
239+ "applySchema is deprecated, please use createDataFrame instead" , DeprecationWarning )
235240
236241 if isinstance (rdd , DataFrame ):
237242 raise TypeError ("Cannot apply schema to DataFrame" )
@@ -262,6 +267,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
262267 :class:`list`, or :class:`pandas.DataFrame`.
263268 :param schema: a :class:`StructType` or list of column names. default None.
264269 :param samplingRatio: the sample ratio of rows used for inferring
270+ :return: :class:`DataFrame`
265271
266272 >>> l = [('Alice', 1)]
267273 >>> sqlContext.createDataFrame(l).collect()
@@ -359,58 +365,25 @@ def registerDataFrameAsTable(self, df, tableName):
359365 else :
360366 raise ValueError ("Can only register DataFrame as table" )
361367
362- @since (1.0 )
363368 def parquetFile (self , * paths ):
364369 """Loads a Parquet file, returning the result as a :class:`DataFrame`.
365370
366- >>> import tempfile, shutil
367- >>> parquetFile = tempfile.mkdtemp()
368- >>> shutil.rmtree(parquetFile)
369- >>> df.saveAsParquetFile(parquetFile)
370- >>> df2 = sqlContext.parquetFile(parquetFile)
371- >>> sorted(df.collect()) == sorted(df2.collect())
372- True
371+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.parquet` instead.
373372 """
373+ warnings .warn ("parquetFile is deprecated. Use read.parquet() instead." , DeprecationWarning )
374374 gateway = self ._sc ._gateway
375375 jpaths = gateway .new_array (gateway .jvm .java .lang .String , len (paths ))
376376 for i in range (0 , len (paths )):
377377 jpaths [i ] = paths [i ]
378378 jdf = self ._ssql_ctx .parquetFile (jpaths )
379379 return DataFrame (jdf , self )
380380
381- @since (1.0 )
382381 def jsonFile (self , path , schema = None , samplingRatio = 1.0 ):
383382 """Loads a text file storing one JSON object per line as a :class:`DataFrame`.
384383
385- If the schema is provided, applies the given schema to this JSON dataset.
386- Otherwise, it samples the dataset with ratio ``samplingRatio`` to determine the schema.
387-
388- >>> import tempfile, shutil
389- >>> jsonFile = tempfile.mkdtemp()
390- >>> shutil.rmtree(jsonFile)
391- >>> with open(jsonFile, 'w') as f:
392- ... f.writelines(jsonStrings)
393- >>> df1 = sqlContext.jsonFile(jsonFile)
394- >>> df1.printSchema()
395- root
396- |-- field1: long (nullable = true)
397- |-- field2: string (nullable = true)
398- |-- field3: struct (nullable = true)
399- | |-- field4: long (nullable = true)
400-
401- >>> from pyspark.sql.types import *
402- >>> schema = StructType([
403- ... StructField("field2", StringType()),
404- ... StructField("field3",
405- ... StructType([StructField("field5", ArrayType(IntegerType()))]))])
406- >>> df2 = sqlContext.jsonFile(jsonFile, schema)
407- >>> df2.printSchema()
408- root
409- |-- field2: string (nullable = true)
410- |-- field3: struct (nullable = true)
411- | |-- field5: array (nullable = true)
412- | | |-- element: integer (containsNull = true)
384+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.json` instead.
413385 """
386+ warnings .warn ("jsonFile is deprecated. Use read.json() instead." , DeprecationWarning )
414387 if schema is None :
415388 df = self ._ssql_ctx .jsonFile (path , samplingRatio )
416389 else :
@@ -462,21 +435,16 @@ def func(iterator):
462435 df = self ._ssql_ctx .jsonRDD (jrdd .rdd (), scala_datatype )
463436 return DataFrame (df , self )
464437
465- @since (1.3 )
466438 def load (self , path = None , source = None , schema = None , ** options ):
467439 """Returns the dataset in a data source as a :class:`DataFrame`.
468440
469- The data source is specified by the ``source`` and a set of ``options``.
470- If ``source`` is not specified, the default data source configured by
471- ``spark.sql.sources.default`` will be used.
472-
473- Optionally, a schema can be provided as the schema of the returned DataFrame.
441+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.load` instead.
474442 """
443+ warnings .warn ("load is deprecated. Use read.load() instead." , DeprecationWarning )
475444 return self .read .load (path , source , schema , ** options )
476445
477446 @since (1.3 )
478- def createExternalTable (self , tableName , path = None , source = None ,
479- schema = None , ** options ):
447+ def createExternalTable (self , tableName , path = None , source = None , schema = None , ** options ):
480448 """Creates an external table based on the dataset in a data source.
481449
482450 It returns the DataFrame associated with the external table.
@@ -487,6 +455,8 @@ def createExternalTable(self, tableName, path=None, source=None,
487455
488456 Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
489457 created external table.
458+
459+ :return: :class:`DataFrame`
490460 """
491461 if path is not None :
492462 options ["path" ] = path
@@ -508,6 +478,8 @@ def createExternalTable(self, tableName, path=None, source=None,
508478 def sql (self , sqlQuery ):
509479 """Returns a :class:`DataFrame` representing the result of the given query.
510480
481+ :return: :class:`DataFrame`
482+
511483 >>> sqlContext.registerDataFrameAsTable(df, "table1")
512484 >>> df2 = sqlContext.sql("SELECT field1 AS f1, field2 as f2 from table1")
513485 >>> df2.collect()
@@ -519,6 +491,8 @@ def sql(self, sqlQuery):
519491 def table (self , tableName ):
520492 """Returns the specified table as a :class:`DataFrame`.
521493
494+ :return: :class:`DataFrame`
495+
522496 >>> sqlContext.registerDataFrameAsTable(df, "table1")
523497 >>> df2 = sqlContext.table("table1")
524498 >>> sorted(df.collect()) == sorted(df2.collect())
@@ -536,6 +510,9 @@ def tables(self, dbName=None):
536510 The returned DataFrame has two columns: ``tableName`` and ``isTemporary``
537511 (a column with :class:`BooleanType` indicating if a table is a temporary one or not).
538512
513+ :param dbName: string, name of the database to use.
514+ :return: :class:`DataFrame`
515+
539516 >>> sqlContext.registerDataFrameAsTable(df, "table1")
540517 >>> df2 = sqlContext.tables()
541518 >>> df2.filter("tableName = 'table1'").first()
@@ -550,7 +527,8 @@ def tables(self, dbName=None):
550527 def tableNames (self , dbName = None ):
551528 """Returns a list of names of tables in the database ``dbName``.
552529
553- If ``dbName`` is not specified, the current database will be used.
530+ :param dbName: string, name of the database to use. Default to the current database.
531+ :return: list of table names, in string
554532
555533 >>> sqlContext.registerDataFrameAsTable(df, "table1")
556534 >>> "table1" in sqlContext.tableNames()
@@ -585,8 +563,7 @@ def read(self):
585563 Returns a :class:`DataFrameReader` that can be used to read data
586564 in as a :class:`DataFrame`.
587565
588- >>> sqlContext.read
589- <pyspark.sql.readwriter.DataFrameReader object at ...>
566+ :return: :class:`DataFrameReader`
590567 """
591568 return DataFrameReader (self )
592569
0 commit comments