Skip to content

Commit ac8307d

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-34215][SQL] Keep tables cached after truncation
### What changes were proposed in this pull request? Invoke `CatalogImpl.refreshTable()` instead of combination of `SessionCatalog.refreshTable()` + `uncacheQuery()`. This allows to clear cached table data while keeping the table cached. ### Why are the changes needed? 1. To improve user experience with Spark SQL 2. To be consistent to other commands, see #31206 ### Does this PR introduce _any_ user-facing change? Yes. Before: ```scala scala> sql("CREATE TABLE tbl (c0 int)") res1: org.apache.spark.sql.DataFrame = [] scala> sql("INSERT INTO tbl SELECT 0") res2: org.apache.spark.sql.DataFrame = [] scala> sql("CACHE TABLE tbl") res3: org.apache.spark.sql.DataFrame = [] scala> sql("SELECT * FROM tbl").show(false) +---+ |c0 | +---+ |0 | +---+ scala> spark.catalog.isCached("tbl") res5: Boolean = true scala> sql("TRUNCATE TABLE tbl") res6: org.apache.spark.sql.DataFrame = [] scala> spark.catalog.isCached("tbl") res7: Boolean = false ``` After: ```scala scala> sql("TRUNCATE TABLE tbl") res6: org.apache.spark.sql.DataFrame = [] scala> spark.catalog.isCached("tbl") res7: Boolean = true ``` ### How was this patch tested? Added new test to `CachedTableSuite`: ``` $ build/sbt -Phive -Phive-thriftserver "test:testOnly *CachedTableSuite" $ build/sbt -Phive -Phive-thriftserver "test:testOnly *CatalogedDDLSuite" ``` Closes #31308 from MaxGekk/truncate-table-cached. Authored-by: Max Gekk <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent dd88eff commit ac8307d

File tree

3 files changed

+17
-10
lines changed

3 files changed

+17
-10
lines changed

docs/sql-migration-guide.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ license: |
4949
* `MSCK REPAIR TABLE`
5050
* `LOAD DATA`
5151
* `REFRESH TABLE`
52+
* `TRUNCATE TABLE`
5253
* and the method `spark.catalog.refreshTable`
5354
In Spark 3.1 and earlier, table refreshing leaves dependents uncached.
5455

sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -561,16 +561,9 @@ case class TruncateTableCommand(
561561
}
562562
}
563563
}
564-
// After deleting the data, invalidate the table to make sure we don't keep around a stale
565-
// file relation in the metastore cache.
566-
spark.sessionState.refreshTable(tableName.unquotedString)
567-
// Also try to drop the contents of the table from the columnar cache
568-
try {
569-
spark.sharedState.cacheManager.uncacheQuery(spark.table(table.identifier), cascade = true)
570-
} catch {
571-
case NonFatal(e) =>
572-
log.warn(s"Exception when attempting to uncache table $tableIdentWithDB", e)
573-
}
564+
// After deleting the data, refresh the table to make sure we don't keep around a stale
565+
// file relation in the metastore cache and cached table data in the cache manager.
566+
spark.catalog.refreshTable(tableIdentWithDB)
574567

575568
if (table.stats.nonEmpty) {
576569
// empty table after truncation

sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
501501
}
502502
}
503503
}
504+
505+
test("SPARK-34215: keep table cached after truncation") {
506+
withTable("tbl") {
507+
sql("CREATE TABLE tbl (c0 int)")
508+
sql("INSERT INTO tbl SELECT 0")
509+
sql("CACHE TABLE tbl")
510+
assert(spark.catalog.isCached("tbl"))
511+
checkAnswer(sql("SELECT * FROM tbl"), Row(0))
512+
sql("TRUNCATE TABLE tbl")
513+
assert(spark.catalog.isCached("tbl"))
514+
checkAnswer(sql("SELECT * FROM tbl"), Seq.empty)
515+
}
516+
}
504517
}

0 commit comments

Comments
 (0)