diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 3033f147bc96..4af930a3cd56 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -232,6 +232,11 @@ def clearCache(self): """Removes all cached tables from the in-memory cache.""" self._jcatalog.clearCache() + @since(2.0) + def refreshTable(self, tableName): + """Invalidate and refresh all the cached metadata of the given table.""" + self._jcatalog.refreshTable(tableName) + def _reset(self): """(Internal use only) Drop all existing databases (except "default"), tables, partitions and functions, and set the current database to "default". diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala index 6ddb1a7a1f1a..7985f2f7cdae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala @@ -214,7 +214,7 @@ abstract class Catalog { def clearCache(): Unit /** - * Invalidate and refresh all the cached the metadata of the given table. For performance reasons, + * Invalidate and refresh all the cached metadata of the given table. For performance reasons, * Spark SQL or the external data source library it uses might cache certain metadata about a * table, such as the location of blocks. When those change outside of Spark SQL, users should * call this function to invalidate the cache.