Skip to content

Commit 068b643

Browse files
gatorsmilerxin
authored andcommitted
[SPARK-11980][SPARK-10621][SQL] Fix json_tuple and add test cases for
Added Python test cases for the function `isnan`, `isnull`, `nanvl` and `json_tuple`. Fixed a bug in the function `json_tuple` rxin , could you help me review my changes? Please let me know anything is missing. Thank you! Have a good Thanksgiving day! Author: gatorsmile <[email protected]> Closes #9977 from gatorsmile/json_tuple.
1 parent d1930ec commit 068b643

File tree

1 file changed

+34
-10
lines changed

1 file changed

+34
-10
lines changed

python/pyspark/sql/functions.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -286,14 +286,6 @@ def countDistinct(col, *cols):
286286
return Column(jc)
287287

288288

289-
@since(1.4)
290-
def monotonicallyIncreasingId():
291-
"""
292-
.. note:: Deprecated in 1.6, use monotonically_increasing_id instead.
293-
"""
294-
return monotonically_increasing_id()
295-
296-
297289
@since(1.6)
298290
def input_file_name():
299291
"""Creates a string column for the file name of the current Spark task.
@@ -305,6 +297,10 @@ def input_file_name():
305297
@since(1.6)
306298
def isnan(col):
307299
"""An expression that returns true iff the column is NaN.
300+
301+
>>> df = sqlContext.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
302+
>>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect()
303+
[Row(r1=False, r2=False), Row(r1=True, r2=True)]
308304
"""
309305
sc = SparkContext._active_spark_context
310306
return Column(sc._jvm.functions.isnan(_to_java_column(col)))
@@ -313,11 +309,23 @@ def isnan(col):
313309
@since(1.6)
314310
def isnull(col):
315311
"""An expression that returns true iff the column is null.
312+
313+
>>> df = sqlContext.createDataFrame([(1, None), (None, 2)], ("a", "b"))
314+
>>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect()
315+
[Row(r1=False, r2=False), Row(r1=True, r2=True)]
316316
"""
317317
sc = SparkContext._active_spark_context
318318
return Column(sc._jvm.functions.isnull(_to_java_column(col)))
319319

320320

321+
@since(1.4)
322+
def monotonicallyIncreasingId():
323+
"""
324+
.. note:: Deprecated in 1.6, use monotonically_increasing_id instead.
325+
"""
326+
return monotonically_increasing_id()
327+
328+
321329
@since(1.6)
322330
def monotonically_increasing_id():
323331
"""A column that generates monotonically increasing 64-bit integers.
@@ -344,6 +352,10 @@ def nanvl(col1, col2):
344352
"""Returns col1 if it is not NaN, or col2 if col1 is NaN.
345353
346354
Both inputs should be floating point columns (DoubleType or FloatType).
355+
356+
>>> df = sqlContext.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
357+
>>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
358+
[Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)]
347359
"""
348360
sc = SparkContext._active_spark_context
349361
return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2)))
@@ -1460,6 +1472,7 @@ def explode(col):
14601472
return Column(jc)
14611473

14621474

1475+
@ignore_unicode_prefix
14631476
@since(1.6)
14641477
def get_json_object(col, path):
14651478
"""
@@ -1468,22 +1481,33 @@ def get_json_object(col, path):
14681481
14691482
:param col: string column in json format
14701483
:param path: path to the json object to extract
1484+
1485+
>>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
1486+
>>> df = sqlContext.createDataFrame(data, ("key", "jstring"))
1487+
>>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \
1488+
get_json_object(df.jstring, '$.f2').alias("c1") ).collect()
1489+
[Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)]
14711490
"""
14721491
sc = SparkContext._active_spark_context
14731492
jc = sc._jvm.functions.get_json_object(_to_java_column(col), path)
14741493
return Column(jc)
14751494

14761495

1496+
@ignore_unicode_prefix
14771497
@since(1.6)
1478-
def json_tuple(col, fields):
1498+
def json_tuple(col, *fields):
14791499
"""Creates a new row for a json column according to the given field names.
14801500
14811501
:param col: string column in json format
14821502
:param fields: list of fields to extract
14831503
1504+
>>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
1505+
>>> df = sqlContext.createDataFrame(data, ("key", "jstring"))
1506+
>>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
1507+
[Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)]
14841508
"""
14851509
sc = SparkContext._active_spark_context
1486-
jc = sc._jvm.functions.json_tuple(_to_java_column(col), fields)
1510+
jc = sc._jvm.functions.json_tuple(_to_java_column(col), _to_seq(sc, fields))
14871511
return Column(jc)
14881512

14891513

0 commit comments

Comments
 (0)