Skip to content

Commit 8ddcb25

Browse files
Davies Liurxin
authored andcommitted
[SPARK-7606] [SQL] [PySpark] add version to Python SQL API docs
Add version info for public Python SQL API. cc rxin Author: Davies Liu <[email protected]> Closes #6295 from davies/versions and squashes the following commits: cfd91e6 [Davies Liu] add more version for DataFrame API 600834d [Davies Liu] add version to SQL API docs
1 parent 04940c4 commit 8ddcb25

File tree

7 files changed

+170
-18
lines changed

7 files changed

+170
-18
lines changed

python/pyspark/sql/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@
4141
"""
4242
from __future__ import absolute_import
4343

44+
45+
def since(version):
46+
def deco(f):
47+
f.__doc__ = f.__doc__.rstrip() + "\n\n.. versionadded:: %s" % version
48+
return f
49+
return deco
50+
4451
# fix the module name conflict for Python 3+
4552
import sys
4653
from . import _types as types

python/pyspark/sql/column.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from pyspark.context import SparkContext
2525
from pyspark.rdd import ignore_unicode_prefix
26+
from pyspark.sql import since
2627
from pyspark.sql.types import *
2728

2829
__all__ = ["DataFrame", "Column", "SchemaRDD", "DataFrameNaFunctions",
@@ -114,6 +115,8 @@ class Column(object):
114115
# 2. Create from an expression
115116
df.colName + 1
116117
1 / df.colName
118+
119+
.. versionadded:: 1.3
117120
"""
118121

119122
def __init__(self, jc):
@@ -159,6 +162,7 @@ def __init__(self, jc):
159162
bitwiseAND = _bin_op("bitwiseAND")
160163
bitwiseXOR = _bin_op("bitwiseXOR")
161164

165+
@since(1.3)
162166
def getItem(self, key):
163167
"""An expression that gets an item at position `ordinal` out of a list,
164168
or gets an item by key out of a dict.
@@ -179,6 +183,7 @@ def getItem(self, key):
179183
"""
180184
return self[key]
181185

186+
@since(1.3)
182187
def getField(self, name):
183188
"""An expression that gets a field by name in a StructField.
184189
@@ -211,6 +216,7 @@ def __getattr__(self, item):
211216
endswith = _bin_op("endsWith")
212217

213218
@ignore_unicode_prefix
219+
@since(1.3)
214220
def substr(self, startPos, length):
215221
"""
216222
Return a :class:`Column` which is a substring of the column
@@ -234,6 +240,7 @@ def substr(self, startPos, length):
234240
__getslice__ = substr
235241

236242
@ignore_unicode_prefix
243+
@since(1.3)
237244
def inSet(self, *cols):
238245
""" A boolean expression that is evaluated to true if the value of this
239246
expression is contained by the evaluated values of the arguments.
@@ -259,6 +266,7 @@ def inSet(self, *cols):
259266
isNull = _unary_op("isNull", "True if the current expression is null.")
260267
isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
261268

269+
@since(1.3)
262270
def alias(self, *alias):
263271
"""Returns this column aliased with a new name or names (in the case of expressions that
264272
return more than one column, such as explode).
@@ -274,6 +282,7 @@ def alias(self, *alias):
274282
return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
275283

276284
@ignore_unicode_prefix
285+
@since(1.3)
277286
def cast(self, dataType):
278287
""" Convert the column into type `dataType`
279288
@@ -294,13 +303,15 @@ def cast(self, dataType):
294303
return Column(jc)
295304

296305
@ignore_unicode_prefix
306+
@since(1.3)
297307
def between(self, lowerBound, upperBound):
298308
""" A boolean expression that is evaluated to true if the value of this
299309
expression is between the given columns.
300310
"""
301311
return (self >= lowerBound) & (self <= upperBound)
302312

303313
@ignore_unicode_prefix
314+
@since(1.4)
304315
def when(self, condition, value):
305316
"""Evaluates a list of conditions and returns one of multiple possible result expressions.
306317
If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
@@ -319,6 +330,7 @@ def when(self, condition, value):
319330
return Column(jc)
320331

321332
@ignore_unicode_prefix
333+
@since(1.4)
322334
def otherwise(self, value):
323335
"""Evaluates a list of conditions and returns one of multiple possible result expressions.
324336
If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.

python/pyspark/sql/context.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
from pyspark.rdd import RDD, _prepare_for_python_RDD, ignore_unicode_prefix
3030
from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
31+
from pyspark.sql import since
3132
from pyspark.sql.types import Row, StringType, StructType, _verify_type, \
3233
_infer_schema, _has_nulltype, _merge_type, _create_converter, _python_to_sql_converter
3334
from pyspark.sql.dataframe import DataFrame
@@ -106,11 +107,13 @@ def _ssql_ctx(self):
106107
self._scala_SQLContext = self._jvm.SQLContext(self._jsc.sc())
107108
return self._scala_SQLContext
108109

110+
@since(1.3)
109111
def setConf(self, key, value):
110112
"""Sets the given Spark SQL configuration property.
111113
"""
112114
self._ssql_ctx.setConf(key, value)
113115

116+
@since(1.3)
114117
def getConf(self, key, defaultValue):
115118
"""Returns the value of Spark SQL configuration property for the given key.
116119
@@ -119,10 +122,12 @@ def getConf(self, key, defaultValue):
119122
return self._ssql_ctx.getConf(key, defaultValue)
120123

121124
@property
125+
@since("1.3.1")
122126
def udf(self):
123127
"""Returns a :class:`UDFRegistration` for UDF registration."""
124128
return UDFRegistration(self)
125129

130+
@since(1.4)
126131
def range(self, start, end, step=1, numPartitions=None):
127132
"""
128133
Create a :class:`DataFrame` with single LongType column named `id`,
@@ -144,6 +149,7 @@ def range(self, start, end, step=1, numPartitions=None):
144149
return DataFrame(jdf, self)
145150

146151
@ignore_unicode_prefix
152+
@since(1.2)
147153
def registerFunction(self, name, f, returnType=StringType()):
148154
"""Registers a lambda function as a UDF so it can be used in SQL statements.
149155
@@ -210,7 +216,8 @@ def _inferSchema(self, rdd, samplingRatio=None):
210216

211217
@ignore_unicode_prefix
212218
def inferSchema(self, rdd, samplingRatio=None):
213-
"""::note: Deprecated in 1.3, use :func:`createDataFrame` instead.
219+
"""
220+
.. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
214221
"""
215222
warnings.warn("inferSchema is deprecated, please use createDataFrame instead")
216223

@@ -221,7 +228,8 @@ def inferSchema(self, rdd, samplingRatio=None):
221228

222229
@ignore_unicode_prefix
223230
def applySchema(self, rdd, schema):
224-
"""::note: Deprecated in 1.3, use :func:`createDataFrame` instead.
231+
"""
232+
.. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
225233
"""
226234
warnings.warn("applySchema is deprecated, please use createDataFrame instead")
227235

@@ -233,6 +241,7 @@ def applySchema(self, rdd, schema):
233241

234242
return self.createDataFrame(rdd, schema)
235243

244+
@since(1.3)
236245
@ignore_unicode_prefix
237246
def createDataFrame(self, data, schema=None, samplingRatio=None):
238247
"""
@@ -337,6 +346,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
337346
df = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
338347
return DataFrame(df, self)
339348

349+
@since(1.3)
340350
def registerDataFrameAsTable(self, df, tableName):
341351
"""Registers the given :class:`DataFrame` as a temporary table in the catalog.
342352
@@ -349,6 +359,7 @@ def registerDataFrameAsTable(self, df, tableName):
349359
else:
350360
raise ValueError("Can only register DataFrame as table")
351361

362+
@since(1.0)
352363
def parquetFile(self, *paths):
353364
"""Loads a Parquet file, returning the result as a :class:`DataFrame`.
354365
@@ -367,6 +378,7 @@ def parquetFile(self, *paths):
367378
jdf = self._ssql_ctx.parquetFile(jpaths)
368379
return DataFrame(jdf, self)
369380

381+
@since(1.0)
370382
def jsonFile(self, path, schema=None, samplingRatio=1.0):
371383
"""Loads a text file storing one JSON object per line as a :class:`DataFrame`.
372384
@@ -407,6 +419,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
407419
return DataFrame(df, self)
408420

409421
@ignore_unicode_prefix
422+
@since(1.0)
410423
def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
411424
"""Loads an RDD storing one JSON object per string as a :class:`DataFrame`.
412425
@@ -449,6 +462,7 @@ def func(iterator):
449462
df = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
450463
return DataFrame(df, self)
451464

465+
@since(1.3)
452466
def load(self, path=None, source=None, schema=None, **options):
453467
"""Returns the dataset in a data source as a :class:`DataFrame`.
454468
@@ -460,6 +474,7 @@ def load(self, path=None, source=None, schema=None, **options):
460474
"""
461475
return self.read.load(path, source, schema, **options)
462476

477+
@since(1.3)
463478
def createExternalTable(self, tableName, path=None, source=None,
464479
schema=None, **options):
465480
"""Creates an external table based on the dataset in a data source.
@@ -489,6 +504,7 @@ def createExternalTable(self, tableName, path=None, source=None,
489504
return DataFrame(df, self)
490505

491506
@ignore_unicode_prefix
507+
@since(1.0)
492508
def sql(self, sqlQuery):
493509
"""Returns a :class:`DataFrame` representing the result of the given query.
494510
@@ -499,6 +515,7 @@ def sql(self, sqlQuery):
499515
"""
500516
return DataFrame(self._ssql_ctx.sql(sqlQuery), self)
501517

518+
@since(1.0)
502519
def table(self, tableName):
503520
"""Returns the specified table as a :class:`DataFrame`.
504521
@@ -510,6 +527,7 @@ def table(self, tableName):
510527
return DataFrame(self._ssql_ctx.table(tableName), self)
511528

512529
@ignore_unicode_prefix
530+
@since(1.3)
513531
def tables(self, dbName=None):
514532
"""Returns a :class:`DataFrame` containing names of tables in the given database.
515533
@@ -528,6 +546,7 @@ def tables(self, dbName=None):
528546
else:
529547
return DataFrame(self._ssql_ctx.tables(dbName), self)
530548

549+
@since(1.3)
531550
def tableNames(self, dbName=None):
532551
"""Returns a list of names of tables in the database ``dbName``.
533552
@@ -544,25 +563,29 @@ def tableNames(self, dbName=None):
544563
else:
545564
return [name for name in self._ssql_ctx.tableNames(dbName)]
546565

566+
@since(1.0)
547567
def cacheTable(self, tableName):
548568
"""Caches the specified table in-memory."""
549569
self._ssql_ctx.cacheTable(tableName)
550570

571+
@since(1.0)
551572
def uncacheTable(self, tableName):
552573
"""Removes the specified table from the in-memory cache."""
553574
self._ssql_ctx.uncacheTable(tableName)
554575

576+
@since(1.3)
555577
def clearCache(self):
556578
"""Removes all cached tables from the in-memory cache. """
557579
self._ssql_ctx.clearCache()
558580

559581
@property
582+
@since(1.4)
560583
def read(self):
561584
"""
562585
Returns a :class:`DataFrameReader` that can be used to read data
563586
in as a :class:`DataFrame`.
564587
565-
::note: Experimental
588+
.. note:: Experimental
566589
567590
>>> sqlContext.read
568591
<pyspark.sql.readwriter.DataFrameReader object at ...>

0 commit comments

Comments
 (0)