Skip to content

Commit 6bd09b1

Browse files
committed
fix
1 parent d0e9219 commit 6bd09b1

File tree

5 files changed

+29
-18
lines changed

5 files changed

+29
-18
lines changed

docs/sql-data-sources-parquet.md

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -295,18 +295,6 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
295295
</p>
296296
</td>
297297
</tr>
298-
<tr>
299-
<td><code>spark.sql.optimizer.metadataOnly</code></td>
300-
<td>true</td>
301-
<td>
302-
<p>
303-
When true, enable the metadata-only query optimization that use the table's metadata to
304-
produce the partition columns instead of table scans. It applies when all the columns scanned
305-
are partition columns and the query has an aggregate operator that satisfies distinct
306-
semantics.
307-
</p>
308-
</td>
309-
</tr>
310298
<tr>
311299
<td><code>spark.sql.parquet.writeLegacyFormat</code></td>
312300
<td>false</td>

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,9 +585,10 @@ object SQLConf {
585585
.doc("When true, enable the metadata-only query optimization that use the table's metadata " +
586586
"to produce the partition columns instead of table scans. It applies when all the columns " +
587587
"scanned are partition columns and the query has an aggregate operator that satisfies " +
588-
"distinct semantics.")
588+
"distinct semantics. By default the optimization is disabled, since it may return " +
589+
"incorrect results with empty tables.")
589590
.booleanConf
590-
.createWithDefault(true)
591+
.createWithDefault(false)
591592

592593
val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord")
593594
.doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " +

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2422,7 +2422,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
24222422
Row(s"$expected") :: Nil)
24232423
}
24242424

2425-
test("SPARK-15752 optimize metadata only query for datasource table") {
2425+
ignore("SPARK-15752 optimize metadata only query for datasource table") {
24262426
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") {
24272427
withTable("srcpart_15752") {
24282428
val data = (1 to 10).map(i => (i, s"data-$i", i % 2, if ((i % 2) == 0) "a" else "b"))
@@ -2966,6 +2966,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
29662966
}
29672967
}
29682968
}
2969+
2970+
test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
2971+
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "false") {
2972+
withTable("t") {
2973+
sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)")
2974+
sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)")
2975+
checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(null))
2976+
checkAnswer(sql("SELECT MAX(col1) FROM t"), Row(null))
2977+
}
2978+
}
2979+
}
29692980
}
29702981

29712982
case class Foo(bar: Option[String])

sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
5858
}
5959

6060
private def testMetadataOnly(name: String, sqls: String*): Unit = {
61-
test(name) {
61+
ignore(name) {
6262
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") {
6363
sqls.foreach { case q => assertMetadataOnlyQuery(sql(q)) }
6464
}
@@ -69,7 +69,7 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
6969
}
7070

7171
private def testNotMetadataOnly(name: String, sqls: String*): Unit = {
72-
test(name) {
72+
ignore(name) {
7373
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") {
7474
sqls.foreach { case q => assertNotMetadataOnlyQuery(sql(q)) }
7575
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
8686
assert(message.contains("Table or view not found"))
8787
}
8888

89+
test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
90+
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "false") {
91+
withTable("t") {
92+
sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)")
93+
sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)")
94+
checkAnswer(sql("SELECT MAX(p1) FROM t"), Row(null))
95+
checkAnswer(sql("SELECT MAX(col1) FROM t"), Row(null))
96+
}
97+
}
98+
}
99+
89100
test("script") {
90101
assume(TestUtils.testCommandAvailable("/bin/bash"))
91102
assume(TestUtils.testCommandAvailable("echo | sed"))
@@ -1770,7 +1781,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
17701781
}
17711782
}
17721783

1773-
test("SPARK-15752 optimize metadata only query for hive table") {
1784+
ignore("SPARK-15752 optimize metadata only query for hive table") {
17741785
withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") {
17751786
withTable("data_15752", "srcpart_15752", "srctext_15752") {
17761787
val df = Seq((1, "2"), (3, "4")).toDF("key", "value")

0 commit comments

Comments
 (0)