Skip to content

Commit 660c6ce

Browse files
committed
[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
Author: Reynold Xin <[email protected]> Closes apache#7079 from rxin/SPARK-8698 and squashes the following commits: 8513e1c [Reynold Xin] [SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
1 parent ac2e17b commit 660c6ce

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

python/pyspark/sql/readwriter.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -270,12 +270,11 @@ def partitionBy(self, *cols):
270270
"""
271271
if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
272272
cols = cols[0]
273-
if len(cols) > 0:
274-
self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
273+
self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
275274
return self
276275

277276
@since(1.4)
278-
def save(self, path=None, format=None, mode=None, partitionBy=(), **options):
277+
def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
279278
"""Saves the contents of the :class:`DataFrame` to a data source.
280279
281280
The data source is specified by the ``format`` and a set of ``options``.
@@ -295,7 +294,9 @@ def save(self, path=None, format=None, mode=None, partitionBy=(), **options):
295294
296295
>>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
297296
"""
298-
self.partitionBy(partitionBy).mode(mode).options(**options)
297+
self.mode(mode).options(**options)
298+
if partitionBy is not None:
299+
self.partitionBy(partitionBy)
299300
if format is not None:
300301
self.format(format)
301302
if path is None:
@@ -315,7 +316,7 @@ def insertInto(self, tableName, overwrite=False):
315316
self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
316317

317318
@since(1.4)
318-
def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options):
319+
def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
319320
"""Saves the content of the :class:`DataFrame` as the specified table.
320321
321322
In the case the table already exists, behavior of this function depends on the
@@ -334,7 +335,9 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options):
334335
:param partitionBy: names of partitioning columns
335336
:param options: all other string options
336337
"""
337-
self.partitionBy(partitionBy).mode(mode).options(**options)
338+
self.mode(mode).options(**options)
339+
if partitionBy is not None:
340+
self.partitionBy(partitionBy)
338341
if format is not None:
339342
self.format(format)
340343
self._jwrite.saveAsTable(name)
@@ -356,7 +359,7 @@ def json(self, path, mode=None):
356359
self.mode(mode)._jwrite.json(path)
357360

358361
@since(1.4)
359-
def parquet(self, path, mode=None, partitionBy=()):
362+
def parquet(self, path, mode=None, partitionBy=None):
360363
"""Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
361364
362365
:param path: the path in any Hadoop supported file system
@@ -370,7 +373,9 @@ def parquet(self, path, mode=None, partitionBy=()):
370373
371374
>>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
372375
"""
373-
self.partitionBy(partitionBy).mode(mode)
376+
self.mode(mode)
377+
if partitionBy is not None:
378+
self.partitionBy(partitionBy)
374379
self._jwrite.parquet(path)
375380

376381
@since(1.4)

0 commit comments

Comments
 (0)