Skip to content

Commit 8513e1c

Browse files
committed
[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
1 parent 0b10662 commit 8513e1c

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

python/pyspark/sql/readwriter.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -256,12 +256,11 @@ def partitionBy(self, *cols):
256256
"""
257257
if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
258258
cols = cols[0]
259-
if len(cols) > 0:
260-
self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
259+
self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols))
261260
return self
262261

263262
@since(1.4)
264-
def save(self, path=None, format=None, mode=None, partitionBy=(), **options):
263+
def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
265264
"""Saves the contents of the :class:`DataFrame` to a data source.
266265
267266
The data source is specified by the ``format`` and a set of ``options``.
@@ -281,7 +280,9 @@ def save(self, path=None, format=None, mode=None, partitionBy=(), **options):
281280
282281
>>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
283282
"""
284-
self.partitionBy(partitionBy).mode(mode).options(**options)
283+
self.mode(mode).options(**options)
284+
if partitionBy is not None:
285+
self.partitionBy(partitionBy)
285286
if format is not None:
286287
self.format(format)
287288
if path is None:
@@ -301,7 +302,7 @@ def insertInto(self, tableName, overwrite=False):
301302
self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
302303

303304
@since(1.4)
304-
def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options):
305+
def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
305306
"""Saves the content of the :class:`DataFrame` as the specified table.
306307
307308
In the case the table already exists, behavior of this function depends on the
@@ -320,7 +321,9 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options):
320321
:param partitionBy: names of partitioning columns
321322
:param options: all other string options
322323
"""
323-
self.partitionBy(partitionBy).mode(mode).options(**options)
324+
self.mode(mode).options(**options)
325+
if partitionBy is not None:
326+
self.partitionBy(partitionBy)
324327
if format is not None:
325328
self.format(format)
326329
self._jwrite.saveAsTable(name)
@@ -342,7 +345,7 @@ def json(self, path, mode=None):
342345
self.mode(mode)._jwrite.json(path)
343346

344347
@since(1.4)
345-
def parquet(self, path, mode=None, partitionBy=()):
348+
def parquet(self, path, mode=None, partitionBy=None):
346349
"""Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
347350
348351
:param path: the path in any Hadoop supported file system
@@ -356,7 +359,9 @@ def parquet(self, path, mode=None, partitionBy=()):
356359
357360
>>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
358361
"""
359-
self.partitionBy(partitionBy).mode(mode)
362+
self.mode(mode)
363+
if partitionBy is not None:
364+
self.partitionBy(partitionBy)
360365
self._jwrite.parquet(path)
361366

362367
@since(1.4)

0 commit comments

Comments
 (0)