Skip to content

Commit ce00b6d

Browse files
committed
Address review comments
1 parent d216fcb commit ce00b6d

File tree

5 files changed

+17
-26
lines changed

5 files changed

+17
-26
lines changed

docs/sql-data-sources-generic-options.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ To load all files recursively, you can use:
125125
`modifiedBefore` and `modifiedAfter` are options that can be
126126
applied together or separately in order to achieve greater
127127
granularity over which files may load during a Spark batch query.
128-
(Structured Streaming file source doesn't support these options.)
128+
(Note that Structured Streaming file sources don't support these options.)
129129

130130
* `modifiedBefore`: an optional timestamp to only include files with
131131
modification times occurring before the specified time. The provided timestamp

examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,19 @@ object SQLDataSourceExample {
8383
// $example off:load_with_path_glob_filter$
8484
// $example on:load_with_modified_time_filter$
8585
val beforeFilterDF = spark.read.format("parquet")
86-
// Files modified before 07/01/2020 at 05:30 are allowed
87-
.option("modifiedBefore", "2020-07-01T05:30:00")
88-
.load("examples/src/main/resources/dir1");
86+
// Files modified before 07/01/2020 at 05:30 are allowed
87+
.option("modifiedBefore", "2020-07-01T05:30:00")
88+
.load("examples/src/main/resources/dir1");
8989
beforeFilterDF.show();
9090
// +-------------+
9191
// | file|
9292
// +-------------+
9393
// |file1.parquet|
9494
// +-------------+
9595
val afterFilterDF = spark.read.format("parquet")
96-
// Files modified after 06/01/2020 at 05:30 are allowed
97-
.option("modifiedAfter", "2020-06-01T05:30:00")
98-
.load("examples/src/main/resources/dir1");
96+
// Files modified after 06/01/2020 at 05:30 are allowed
97+
.option("modifiedAfter", "2020-06-01T05:30:00")
98+
.load("examples/src/main/resources/dir1");
9999
afterFilterDF.show();
100100
// +-------------+
101101
// | file|

python/pyspark/sql/readwriter.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -832,8 +832,7 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar
832832
"""
833833
if properties is None:
834834
properties = dict()
835-
jprop = JavaClass("java.util.Properties",
836-
self._spark._sc._gateway._gateway_client)()
835+
jprop = JavaClass("java.util.Properties", self._spark._sc._gateway._gateway_client)()
837836
for k in properties:
838837
jprop.setProperty(k, properties[k])
839838
if column is not None:
@@ -845,8 +844,7 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar
845844
int(numPartitions), jprop))
846845
if predicates is not None:
847846
gateway = self._spark._sc._gateway
848-
jpredicates = utils.toJArray(
849-
gateway, gateway.jvm.java.lang.String, predicates)
847+
jpredicates = utils.toJArray(gateway, gateway.jvm.java.lang.String, predicates)
850848
return self._df(self._jreader.jdbc(url, table, jpredicates, jprop))
851849
return self._df(self._jreader.jdbc(url, table, jprop))
852850

@@ -859,7 +857,6 @@ class DataFrameWriter(OptionUtils):
859857
860858
.. versionadded:: 1.4
861859
"""
862-
863860
def __init__(self, df):
864861
self._df = df
865862
self._spark = df.sql_ctx
@@ -1001,21 +998,18 @@ def bucketBy(self, numBuckets, col, *cols):
1001998
... .saveAsTable('bucketed_table'))
1002999
"""
10031000
if not isinstance(numBuckets, int):
1004-
raise TypeError(
1005-
"numBuckets should be an int, got {0}.".format(type(numBuckets)))
1001+
raise TypeError("numBuckets should be an int, got {0}.".format(type(numBuckets)))
10061002

10071003
if isinstance(col, (list, tuple)):
10081004
if cols:
1009-
raise ValueError(
1010-
"col is a {0} but cols are not empty".format(type(col)))
1005+
raise ValueError("col is a {0} but cols are not empty".format(type(col)))
10111006

10121007
col, cols = col[0], col[1:]
10131008

10141009
if not all(isinstance(c, str) for c in cols) or not(isinstance(col, str)):
10151010
raise TypeError("all names should be `str`")
10161011

1017-
self._jwrite = self._jwrite.bucketBy(
1018-
numBuckets, col, _to_seq(self._spark._sc, cols))
1012+
self._jwrite = self._jwrite.bucketBy(numBuckets, col, _to_seq(self._spark._sc, cols))
10191013
return self
10201014

10211015
def sortBy(self, col, *cols):
@@ -1040,8 +1034,7 @@ def sortBy(self, col, *cols):
10401034
"""
10411035
if isinstance(col, (list, tuple)):
10421036
if cols:
1043-
raise ValueError(
1044-
"col is a {0} but cols are not empty".format(type(col)))
1037+
raise ValueError("col is a {0} but cols are not empty".format(type(col)))
10451038

10461039
col, cols = col[0], col[1:]
10471040

@@ -1423,8 +1416,7 @@ def jdbc(self, url, table, mode=None, properties=None):
14231416
"""
14241417
if properties is None:
14251418
properties = dict()
1426-
jprop = JavaClass("java.util.Properties",
1427-
self._spark._sc._gateway._gateway_client)()
1419+
jprop = JavaClass("java.util.Properties", self._spark._sc._gateway._gateway_client)()
14281420
for k in properties:
14291421
jprop.setProperty(k, properties[k])
14301422
self.mode(mode)._jwrite.jdbc(url, table, jprop)
@@ -1590,8 +1582,7 @@ def _test():
15901582
globs['os'] = os
15911583
globs['sc'] = sc
15921584
globs['spark'] = spark
1593-
globs['df'] = spark.read.parquet(
1594-
'python/test_support/sql/parquet_partitioned')
1585+
globs['df'] = spark.read.parquet('python/test_support/sql/parquet_partitioned')
15951586
(failure_count, test_count) = doctest.testmod(
15961587
pyspark.sql.readwriter, globs=globs,
15971588
optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ abstract class PartitioningAwareFileIndex(
5757
protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]]
5858

5959
private val caseInsensitiveMap = CaseInsensitiveMap(parameters)
60-
protected val pathFilters = PathFilterFactory.create(caseInsensitiveMap)
60+
private val pathFilters = PathFilterFactory.create(caseInsensitiveMap)
6161

6262
protected def matchPathPattern(file: FileStatus): Boolean =
6363
pathFilters.forall(_.accept(file))

sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
3838
private def checkDisallowedOptions(options: Map[String, String]): Unit = {
3939
Seq(ModifiedBeforeFilter.PARAM_NAME, ModifiedAfterFilter.PARAM_NAME).foreach { param =>
4040
if (parameters.contains(param)) {
41-
throw new IllegalArgumentException(s"option '$param' is not allowed in file stream source")
41+
throw new IllegalArgumentException(s"option '$param' is not allowed in file stream sources")
4242
}
4343
}
4444
}

0 commit comments

Comments
 (0)