Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper {
assert(dbs.length == 2)
assert(dbs.map(_.name) sameElements Array(db, currentDb))
assert(dbs.map(_.catalog).distinct sameElements Array("spark_catalog"))
var databasesWithPattern = spark.catalog.listDatabases("def*").collect().sortBy(_.name)
var databasesWithPattern = spark.catalog.listDatabases("def%").collect().sortBy(_.name)
assert(databasesWithPattern.length == 1)
assert(databasesWithPattern.map(_.name) sameElements Array(currentDb))
databasesWithPattern = spark.catalog.listDatabases("def2*").collect().sortBy(_.name)
databasesWithPattern = spark.catalog.listDatabases("def2%").collect().sortBy(_.name)
assert(databasesWithPattern.length == 0)
val database = spark.catalog.getDatabase(db)
assert(database.name == db)
Expand Down Expand Up @@ -75,10 +75,10 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper {
val catalogsAfterChange = spark.catalog.listCatalogs().collect()
assert(catalogsAfterChange.length == 2)
assert(catalogsAfterChange.map(_.name).toSet == Set("testcat", "spark_catalog"))
var catalogsWithPattern = spark.catalog.listCatalogs("spark*").collect()
var catalogsWithPattern = spark.catalog.listCatalogs("spark%").collect()
assert(catalogsWithPattern.length == 1)
assert(catalogsWithPattern.map(_.name) sameElements Array("spark_catalog"))
catalogsWithPattern = spark.catalog.listCatalogs("hive*").collect()
catalogsWithPattern = spark.catalog.listCatalogs("hive%").collect()
assert(catalogsWithPattern.length == 0)
} finally {
spark.catalog.setCurrentCatalog(currentCatalog)
Expand Down Expand Up @@ -128,12 +128,12 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper {
jsonTableName))
assert(
spark.catalog
.listTables(spark.catalog.currentDatabase, "par*")
.listTables(spark.catalog.currentDatabase, "par%")
.collect()
.map(_.name)
.toSet == Set(parquetTableName))
assert(
spark.catalog.listTables(spark.catalog.currentDatabase, "txt*").collect().isEmpty)
spark.catalog.listTables(spark.catalog.currentDatabase, "txt%").collect().isEmpty)
}
assert(spark.catalog.tableExists(parquetTableName))
assert(!spark.catalog.tableExists(orcTableName))
Expand Down Expand Up @@ -212,11 +212,11 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper {
}.getMessage
assert(message.contains("UNRESOLVED_ROUTINE"))

val functionsWithPattern1 = spark.catalog.listFunctions(dbName, "to*").collect()
val functionsWithPattern1 = spark.catalog.listFunctions(dbName, "to%").collect()
assert(functionsWithPattern1.nonEmpty)
assert(functionsWithPattern1.exists(f => f.name == "to_date"))
val functionsWithPattern2 =
spark.catalog.listFunctions(dbName, "*not_existing_func*").collect()
spark.catalog.listFunctions(dbName, "%not_existing_func%").collect()
assert(functionsWithPattern2.isEmpty)
}

Expand Down
14 changes: 12 additions & 2 deletions docs/sql-ref-syntax-aux-show-tables.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,21 @@ SHOW TABLES [ { FROM | IN } database_name ] [ LIKE regex_pattern ]

* **regex_pattern**

Specifies the regular expression pattern that is used to filter out unwanted tables.
Specifies the regular expression pattern that is used to filter out unwanted tables.
Copy link
Contributor Author

@panbingkun panbingkun Feb 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before:
image

After:
image


1. After Version 4.0
* Same as SQL type `like` expressions, `%` for any character(s), and `_` for a single character.
* Examples are `'employees'`, `'emp%'`, `'emplo_ees'`, all of which will match the database named `'employees'`.
* **Note**
* The `OR` syntax represented by `|` is no longer supported by default.
* You can restore the semantics supported before version 4 by setting `spark.sql.legacy.useVerticalBarAndStarAsWildcardsInLikePattern` to true.

1. Before Version 4.0
* Except for `*` and `|` character, the pattern works like a regular expression.
* `*` alone matches 0 or more characters and `|` is used to separate multiple different regular expressions,
any of which can match.
* The leading and trailing blanks are trimmed in the input pattern before processing. The pattern match is case-insensitive.
* Examples are `'employees'`, `'emp*'`, `'emp*|*ees'`, all of which will match the database named `'employees'`.

### Examples

Expand Down Expand Up @@ -79,7 +88,7 @@ SHOW TABLES IN userdb;
+--------+---------+-----------+

-- List all tables from default database matching the pattern `sam*`
SHOW TABLES FROM default LIKE 'sam*';
SHOW TABLES FROM default LIKE 'sam%';
+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
Expand All @@ -88,6 +97,7 @@ SHOW TABLES FROM default LIKE 'sam*';
+--------+---------+-----------+

-- List all tables matching the pattern `sam*|suj`
Note: After Version 4.0, the `OR` syntax represented by `|` is no longer supported by default.
SHOW TABLES LIKE 'sam*|suj';
+--------+---------+-----------+
|database|tableName|isTemporary|
Expand Down
16 changes: 8 additions & 8 deletions python/pyspark/sql/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ def listCatalogs(self, pattern: Optional[str] = None) -> List[CatalogMetadata]:
>>> spark.catalog.listCatalogs()
[CatalogMetadata(name='spark_catalog', description=None)]

>>> spark.catalog.listCatalogs("spark*")
>>> spark.catalog.listCatalogs("spark%")
[CatalogMetadata(name='spark_catalog', description=None)]

>>> spark.catalog.listCatalogs("hive*")
>>> spark.catalog.listCatalogs("hive%")
[]
"""
if pattern is None:
Expand Down Expand Up @@ -213,10 +213,10 @@ def listDatabases(self, pattern: Optional[str] = None) -> List[Database]:
>>> spark.catalog.listDatabases()
[Database(name='default', catalog='spark_catalog', description='default database', ...

>>> spark.catalog.listDatabases("def*")
>>> spark.catalog.listDatabases("def%")
[Database(name='default', catalog='spark_catalog', description='default database', ...

>>> spark.catalog.listDatabases("def2*")
>>> spark.catalog.listDatabases("def2%")
[]
"""
if pattern is None:
Expand Down Expand Up @@ -342,10 +342,10 @@ def listTables(
>>> spark.catalog.listTables()
[Table(name='test_view', catalog=None, namespace=[], description=None, ...

>>> spark.catalog.listTables(pattern="test*")
>>> spark.catalog.listTables(pattern="test%")
[Table(name='test_view', catalog=None, namespace=[], description=None, ...

>>> spark.catalog.listTables(pattern="table*")
>>> spark.catalog.listTables(pattern="table%")
[]

>>> _ = spark.catalog.dropTempView("test_view")
Expand Down Expand Up @@ -470,10 +470,10 @@ def listFunctions(
>>> spark.catalog.listFunctions()
[Function(name=...

>>> spark.catalog.listFunctions(pattern="to_*")
>>> spark.catalog.listFunctions(pattern="to_%")
[Function(name=...

>>> spark.catalog.listFunctions(pattern="*not_existing_func*")
>>> spark.catalog.listFunctions(pattern="%not_existing_func%")
[]
"""
if dbName is None:
Expand Down
16 changes: 8 additions & 8 deletions python/pyspark/sql/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def test_list_databases(self):
spark.sql("CREATE DATABASE some_db")
databases = [db.name for db in spark.catalog.listDatabases()]
self.assertEqual(sorted(databases), ["default", "some_db"])
databases = [db.name for db in spark.catalog.listDatabases("def*")]
databases = [db.name for db in spark.catalog.listDatabases("def%")]
self.assertEqual(sorted(databases), ["default"])
databases = [db.name for db in spark.catalog.listDatabases("def2*")]
databases = [db.name for db in spark.catalog.listDatabases("def2%")]
self.assertEqual(sorted(databases), [])

def test_database_exists(self):
Expand Down Expand Up @@ -94,17 +94,17 @@ def test_list_tables(self):

tables = sorted(spark.catalog.listTables(), key=lambda t: t.name)
tablesWithPattern = sorted(
spark.catalog.listTables(pattern="tab*"), key=lambda t: t.name
spark.catalog.listTables(pattern="tab%"), key=lambda t: t.name
)
tablesDefault = sorted(
spark.catalog.listTables("default"), key=lambda t: t.name
)
tablesDefaultWithPattern = sorted(
spark.catalog.listTables("default", "tab*"), key=lambda t: t.name
spark.catalog.listTables("default", "tab%"), key=lambda t: t.name
)
tablesSomeDb = sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name)
tablesSomeDbWithPattern = sorted(
spark.catalog.listTables("some_db", "tab*"), key=lambda t: t.name
spark.catalog.listTables("some_db", "tab%"), key=lambda t: t.name
)
self.assertEqual(tables, tablesDefault)
self.assertEqual(tablesWithPattern, tablesDefaultWithPattern)
Expand Down Expand Up @@ -265,10 +265,10 @@ def test_list_functions(self):
self.assertEqual(functions, functionsDefault)

functionsWithPattern = dict(
(f.name, f) for f in spark.catalog.listFunctions(pattern="to*")
(f.name, f) for f in spark.catalog.listFunctions(pattern="to%")
)
functionsDefaultWithPattern = dict(
(f.name, f) for f in spark.catalog.listFunctions("default", "to*")
(f.name, f) for f in spark.catalog.listFunctions("default", "to%")
)
self.assertTrue(len(functionsWithPattern) > 10)
self.assertFalse("+" in functionsWithPattern)
Expand All @@ -279,7 +279,7 @@ def test_list_functions(self):
self.assertTrue("to_unix_timestamp" in functionsWithPattern)
self.assertEqual(functionsWithPattern, functionsDefaultWithPattern)
functionsWithPattern = dict(
(f.name, f) for f in spark.catalog.listFunctions(pattern="*not_existing_func*")
(f.name, f) for f in spark.catalog.listFunctions(pattern="%not_existing_func%")
)
self.assertTrue(len(functionsWithPattern) == 0)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,16 @@

package org.apache.spark.sql.connector.catalog;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import scala.jdk.javaapi.CollectionConverters;

import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.catalyst.util.StringUtils;
import org.apache.spark.sql.connector.expressions.Transform;
import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
Expand All @@ -26,10 +35,6 @@
import org.apache.spark.sql.errors.QueryExecutionErrors;
import org.apache.spark.sql.types.StructType;

import java.util.Collections;
import java.util.Map;
import java.util.Set;

/**
* Catalog methods for working with Tables.
* <p>
Expand Down Expand Up @@ -97,6 +102,30 @@ public interface TableCatalog extends CatalogPlugin {
*/
Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException;

/**
* List the tables in a namespace from the catalog by pattern string.
* <p>
* If the catalog supports views, this must return identifiers for only tables and not views.
*
* @param namespace a multi-part namespace
* @param pattern the filter pattern,
* when 'spark.sql.legacy.useVerticalBarAndStarAsWildcardsInLikePattern'
* is true, use '*' for any character(s) and '|' for a choice as wildcards.
* If it is false, use '%' for any character(s) and '_' for a single character
* as wildcards. Please refer to 'regex_pattern' in https://
* spark.apache.org/docs/latest/sql-ref-syntax-aux-show-tables.html#parameters
* for more details.
* @return an array of Identifiers for tables
* @throws NoSuchNamespaceException If the namespace does not exist (optional).
*/
default Identifier[] listTables(String[] namespace, String pattern)
throws NoSuchNamespaceException {
List<String> tableNames = Arrays.stream(listTables(namespace)).map(Identifier::name).toList();
return CollectionConverters.asJava(StringUtils.filterPattern(
CollectionConverters.asScala(tableNames).toSeq(), pattern)).stream().map(
name -> Identifier.of(namespace, name)).toArray(Identifier[]::new);
}

/**
* Load table metadata by {@link Identifier identifier} from the catalog.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,8 @@ class SessionCatalog(
* Note that, if the specified database is global temporary view database, we will list global
* temporary views.
*/
def listTables(db: String): Seq[TableIdentifier] = listTables(db, "*")
def listTables(db: String): Seq[TableIdentifier] =
listTables(db, StringUtils.getMatchAllWildcard)

/**
* List all matching tables in the specified database, including local temporary views.
Expand Down Expand Up @@ -1830,7 +1831,8 @@ class SessionCatalog(
* returns the function identifier and the scope in which it was defined (system or user
* defined).
*/
def listFunctions(db: String): Seq[(FunctionIdentifier, String)] = listFunctions(db, "*")
def listFunctions(db: String): Seq[(FunctionIdentifier, String)] =
listFunctions(db, StringUtils.getMatchAllWildcard)

/**
* List all matching functions in the specified database, including temporary functions. This
Expand Down Expand Up @@ -1882,7 +1884,7 @@ class SessionCatalog(
dropTable(table, ignoreIfNotExists = false, purge = false)
}
// Temp functions are dropped below, we only need to drop permanent functions here.
externalCatalog.listFunctions(DEFAULT_DATABASE, "*").map { f =>
externalCatalog.listFunctions(DEFAULT_DATABASE, StringUtils.getMatchAllWildcard).map { f =>
FunctionIdentifier(f, Some(DEFAULT_DATABASE))
}.foreach(dropFunction(_, ignoreIfNotExists = false))
clearTempTables()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,26 +108,84 @@ object StringUtils extends Logging {
// scalastyle:on caselocale

/**
* This utility can be used for filtering pattern in the "Like" of "Show Tables / Functions" DDL
* get Wildcard that represent matching "all"
*/
def getMatchAllWildcard: String = {
if (SQLConf.get.legacyUseStarAndVerticalBarAsWildcardsInLikePattern) {
"*"
} else {
"%"
}
}

def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
if (SQLConf.get.legacyUseStarAndVerticalBarAsWildcardsInLikePattern) {
legacyFilterPattern(names, pattern)
} else {
filterBySQLLikePattern(names, pattern)
}
}

/**
* This legacy utility can be used for filtering pattern in the "Like" of
* "Show Tables / Functions" DDL.
* @param names the names list to be filtered
* @param pattern the filter pattern, only '*' and '|' are allowed as wildcards, others will
* follow regular expression convention, case insensitive match and white spaces
* on both ends will be ignored
* @return the filtered names list in order
*/
def filterPattern(names: Seq[String], pattern: String): Seq[String] = {
def legacyFilterPattern(names: Seq[String], pattern: String): Seq[String] = {
val funcNames = scala.collection.mutable.SortedSet.empty[String]
pattern.trim().split("\\|").foreach { subPattern =>
try {
val regex = ("(?i)" + subPattern.replaceAll("\\*", ".*")).r
funcNames ++= names.filter{ name => regex.pattern.matcher(name).matches() }
funcNames ++= names.filter { name => regex.pattern.matcher(name).matches() }
} catch {
case _: PatternSyntaxException =>
}
}
funcNames.toSeq
}

/**
* This utility can be used for filtering pattern in the "Like" of "Show Tables / Functions" DDL.
* @param names the names list to be filtered
* @param pattern the filter pattern, same as SQL type `like` expressions:
* '%' for any character(s), and '_' for a single character
* @return the filtered names list
*/
def filterBySQLLikePattern(names: Seq[String], pattern: String): Seq[String] = {
try {
val p = Pattern.compile(likePatternToRegExp(pattern), Pattern.CASE_INSENSITIVE)
names.filter { name => p.matcher(name).matches() }
} catch {
case _: PatternSyntaxException => Seq.empty[String]
}
}

private[util] def likePatternToRegExp(pattern: String): String = {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

val regExp = new StringBuilder()

var index = 0
while (index < pattern.length) {
val cur = pattern.charAt(index)
cur match {
// Make a special case for "\\_" and "\\%"
case '\\' if (index + 1 < pattern.length()
&& (pattern.charAt(index + 1) == '_' || pattern.charAt(index + 1) == '%')) =>
regExp += pattern.charAt(index + 1)
index = index + 1
case '_' => regExp ++= "."
case '%' => regExp ++= ".*?"
case _ => regExp ++= Pattern.quote(Character.toString(cur))
}
index = index + 1
}

regExp.result()
}

/**
* A string concatenator for plan strings. Uses length from a configured value, and
* prints a warning the first time a plan is truncated.
Expand Down
Loading