diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 64de33e8ec0a..5bd37f8d4e7e 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -20,6 +20,7 @@ if sys.version >= '3': basestring = unicode = str +import logging from py4j.java_gateway import JavaClass from pyspark import RDD, since, keyword_only @@ -370,7 +371,7 @@ def orc(self, path): >>> df = spark.read.orc('python/test_support/sql/orc_partitioned') >>> df.dtypes - [('a', 'bigint'), ('b', 'int'), ('c', 'int')] + [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] """ return self._df(self._jreader.orc(path)) @@ -501,6 +502,46 @@ def partitionBy(self, *cols): self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols)) return self + @since(2.1) + def bucketBy(self, numBuckets, *cols): + """Buckets the output by the given columns on the file system. + + :param numBuckets: the number of buckets to save + :param cols: name of columns + + >>> (df.write.format('parquet') + ... .bucketBy(100, 'year', 'month') + ... .saveAsTable('bucketed_data')) + """ + if len(cols) == 1 and isinstance(cols[0], (list, tuple)): + cols = cols[0] + + col = cols[0] + cols = cols[1:] + + self._jwrite = self._jwrite.bucketBy(numBuckets, col, _to_seq(self._spark._sc, cols)) + return self + + @since(2.1) + def sortBy(self, *cols): + """Sorts the output in each bucket by the given columns on the file system. + + :param cols: name of columns + + >>> (df.write.format('parquet') + ... .bucketBy(100, 'year', 'month') + ... .sortBy('day') + ... .saveAsTable('sorted_data')) + """ + if len(cols) == 1 and isinstance(cols[0], (list, tuple)): + cols = cols[0] + + col = cols[0] + cols = cols[1:] + + self._jwrite = self._jwrite.sortBy(col, _to_seq(self._spark._sc, cols)) + return self + @since(1.4) def save(self, path=None, format=None, mode=None, partitionBy=None, **options): """Saves the contents of the :class:`DataFrame` to a data source. @@ -562,6 +603,8 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options) :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) :param partitionBy: names of partitioning columns :param options: all other string options + + >>> df.write.saveAsTable('my_table') """ self.mode(mode).options(**options) if partitionBy is not None: @@ -693,8 +736,7 @@ def orc(self, path, mode=None, partitionBy=None, compression=None): This will override ``orc.compress``. If None is set, it uses the default value, ``snappy``. - >>> orc_df = spark.read.orc('python/test_support/sql/orc_partitioned') - >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data')) + >>> df.write.orc(os.path.join(tempfile.mkdtemp(), 'data')) """ self.mode(mode) if partitionBy is not None: @@ -734,11 +776,22 @@ def _test(): import os import tempfile import py4j + import shutil + from random import Random + from time import time from pyspark.context import SparkContext from pyspark.sql import SparkSession, Row import pyspark.sql.readwriter - os.chdir(os.environ["SPARK_HOME"]) + spark_home = os.path.realpath(os.environ["SPARK_HOME"]) + + test_dir = tempfile.mkdtemp() + os.chdir(test_dir) + + path = lambda x, y, z: os.path.join(x, y) + + shutil.copytree(path(spark_home, 'python', 'test_support'), + path(test_dir, 'python', 'test_support')) globs = pyspark.sql.readwriter.__dict__.copy() sc = SparkContext('local[4]', 'PythonTest') @@ -747,16 +800,25 @@ def _test(): except py4j.protocol.Py4JError: spark = SparkSession(sc) + seed = int(time() * 1000) + rng = Random(seed) + + base_df_format = rng.choice(('orc', 'parquet')) + loader = getattr(spark.read, base_df_format) + path = os.path.join(test_dir, 'python/test_support/sql/%s_partitioned' % base_df_format) + df = loader(path) + globs['tempfile'] = tempfile globs['os'] = os globs['sc'] = sc globs['spark'] = spark - globs['df'] = spark.read.parquet('python/test_support/sql/parquet_partitioned') + globs['df'] = df (failure_count, test_count) = doctest.testmod( pyspark.sql.readwriter, globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) sc.stop() if failure_count: + logging.error('Random seed for test: %d', seed) exit(-1) diff --git a/python/test_support/sql/orc_partitioned/._SUCCESS.crc b/python/test_support/sql/orc_partitioned/._SUCCESS.crc new file mode 100644 index 000000000000..3b7b044936a8 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/._SUCCESS.crc differ diff --git a/python/test_support/sql/orc_partitioned/_SUCCESS b/python/test_support/sql/orc_partitioned/_SUCCESS old mode 100755 new mode 100644 diff --git a/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc b/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc deleted file mode 100644 index 834cf0b7f227..000000000000 Binary files a/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc and /dev/null differ diff --git a/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc b/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc deleted file mode 100755 index 494380187335..000000000000 Binary files a/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc and /dev/null differ diff --git a/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc b/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc deleted file mode 100644 index 693dceeee3ef..000000000000 Binary files a/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc and /dev/null differ diff --git a/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc b/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc deleted file mode 100755 index 4cbb95ae0242..000000000000 Binary files a/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc and /dev/null differ diff --git a/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/.part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc b/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/.part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc new file mode 100644 index 000000000000..100b553f3e6f Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/.part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc differ diff --git a/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc b/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc new file mode 100644 index 000000000000..41bde3f51011 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2014/month=9/day=1/part-r-00004-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc new file mode 100644 index 000000000000..618495c42cf9 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc new file mode 100644 index 000000000000..b97ecd99ae93 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/.part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc new file mode 100644 index 000000000000..32240cd6c484 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00000-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc new file mode 100644 index 000000000000..522eca6feb5e Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=25/part-r-00001-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/.part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/.part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc new file mode 100644 index 000000000000..457bbd61c0f0 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/.part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc new file mode 100644 index 000000000000..aad339a676af Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=10/day=26/part-r-00003-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/.part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc b/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/.part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc new file mode 100644 index 000000000000..0e2af0d35f99 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/.part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc.crc differ diff --git a/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc b/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc new file mode 100644 index 000000000000..81a46c351e26 Binary files /dev/null and b/python/test_support/sql/orc_partitioned/year=2015/month=9/day=1/part-r-00002-d421fe32-be16-4ee1-9be1-b2560a375fec.snappy.orc differ diff --git a/python/test_support/sql/parquet_partitioned/._SUCCESS.crc b/python/test_support/sql/parquet_partitioned/._SUCCESS.crc new file mode 100644 index 000000000000..3b7b044936a8 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/._SUCCESS.crc differ diff --git a/python/test_support/sql/parquet_partitioned/_common_metadata b/python/test_support/sql/parquet_partitioned/_common_metadata deleted file mode 100644 index 7ef2320651de..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/_common_metadata and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/_metadata b/python/test_support/sql/parquet_partitioned/_metadata deleted file mode 100644 index 78a1ca7d3827..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/_metadata and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc new file mode 100644 index 000000000000..6810b1b7906b Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc differ diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc deleted file mode 100644 index e93f42ed6f35..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet new file mode 100644 index 000000000000..ad3934acd288 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00004-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet differ diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet deleted file mode 100644 index 461c382937ec..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc new file mode 100644 index 000000000000..fc01a8de423a Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc new file mode 100644 index 000000000000..54c1cc0eb46f Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc deleted file mode 100644 index b63c4d6d1e1d..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc deleted file mode 100644 index 5bc0ebd71356..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet new file mode 100644 index 000000000000..f11884e80828 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00000-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet new file mode 100644 index 000000000000..896b89a38fe8 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00001-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet deleted file mode 100644 index 62a63915beac..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet deleted file mode 100644 index 67665a7b55da..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc new file mode 100644 index 000000000000..f9f3e8d0fea6 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc deleted file mode 100644 index ae94a15d08c8..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet new file mode 100644 index 000000000000..f2745fb72aa6 Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00003-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet deleted file mode 100644 index 6cb8538aa890..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc new file mode 100644 index 000000000000..879e623a54cc Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet.crc differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc deleted file mode 100644 index 58d9bb5fc588..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc and /dev/null differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet new file mode 100644 index 000000000000..ed75c907763d Binary files /dev/null and b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00002-49e02526-4b87-4d0d-b081-09a3374f8cf4.snappy.parquet differ diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet deleted file mode 100644 index 9b00805481e7..000000000000 Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet and /dev/null differ