-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-27919][SQL] Add v2 session catalog #24768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,54 +17,127 @@ | |
|
|
||
| package org.apache.spark.sql.catalog.v2 | ||
|
|
||
| import scala.util.control.NonFatal | ||
|
|
||
| import org.apache.spark.annotation.Experimental | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.sql.catalyst.TableIdentifier | ||
|
|
||
| /** | ||
| * A trait to encapsulate catalog lookup function and helpful extractors. | ||
| */ | ||
| @Experimental | ||
| trait LookupCatalog { | ||
| trait LookupCatalog extends Logging { | ||
|
|
||
| import LookupCatalog._ | ||
|
|
||
| protected def defaultCatalogName: Option[String] = None | ||
| protected def lookupCatalog(name: String): CatalogPlugin | ||
|
|
||
| type CatalogObjectIdentifier = (Option[CatalogPlugin], Identifier) | ||
| /** | ||
| * Returns the default catalog. When set, this catalog is used for all identifiers that do not | ||
| * set a specific catalog. When this is None, the session catalog is responsible for the | ||
| * identifier. | ||
| * | ||
| * If this is None and a table's provider (source) is a v2 provider, the v2 session catalog will | ||
| * be used. | ||
| */ | ||
| def defaultCatalog: Option[CatalogPlugin] = { | ||
| try { | ||
| defaultCatalogName.map(lookupCatalog) | ||
| } catch { | ||
| case NonFatal(e) => | ||
| logError(s"Cannot load default v2 catalog: ${defaultCatalogName.get}", e) | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Extract catalog plugin and identifier from a multi-part identifier. | ||
| * This catalog is a v2 catalog that delegates to the v1 session catalog. it is used when the | ||
| * session catalog is responsible for an identifier, but the source requires the v2 catalog API. | ||
| * This happens when the source implementation extends the v2 TableProvider API and is not listed | ||
| * in the fallback configuration, spark.sql.sources.write.useV1SourceList | ||
| */ | ||
| object CatalogObjectIdentifier { | ||
| def unapply(parts: Seq[String]): Some[CatalogObjectIdentifier] = parts match { | ||
| case Seq(name) => | ||
| Some((None, Identifier.of(Array.empty, name))) | ||
| def sessionCatalog: Option[CatalogPlugin] = { | ||
| try { | ||
| Some(lookupCatalog(SESSION_CATALOG_NAME)) | ||
| } catch { | ||
| case NonFatal(e) => | ||
| logError("Cannot load v2 session catalog", e) | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Extract catalog plugin and remaining identifier names. | ||
| * | ||
| * This does not substitute the default catalog if no catalog is set in the identifier. | ||
| */ | ||
| private object CatalogAndIdentifier { | ||
| def unapply(parts: Seq[String]): Some[(Option[CatalogPlugin], Seq[String])] = parts match { | ||
| case Seq(_) => | ||
| Some((None, parts)) | ||
| case Seq(catalogName, tail @ _*) => | ||
| try { | ||
| Some((Some(lookupCatalog(catalogName)), Identifier.of(tail.init.toArray, tail.last))) | ||
| Some((Some(lookupCatalog(catalogName)), tail)) | ||
| } catch { | ||
| case _: CatalogNotFoundException => | ||
| Some((None, Identifier.of(parts.init.toArray, parts.last))) | ||
| Some((None, parts)) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| type CatalogObjectIdentifier = (Option[CatalogPlugin], Identifier) | ||
|
|
||
| /** | ||
| * Extract catalog and identifier from a multi-part identifier with the default catalog if needed. | ||
| */ | ||
| object CatalogObjectIdentifier { | ||
| def unapply(parts: Seq[String]): Some[CatalogObjectIdentifier] = parts match { | ||
| case CatalogAndIdentifier(maybeCatalog, nameParts) => | ||
| Some(( | ||
| maybeCatalog.orElse(defaultCatalog), | ||
| Identifier.of(nameParts.init.toArray, nameParts.last) | ||
| )) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Extract legacy table identifier from a multi-part identifier. | ||
| * | ||
| * For legacy support only. Please use [[CatalogObjectIdentifier]] instead on DSv2 code paths. | ||
| */ | ||
| object AsTableIdentifier { | ||
| def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match { | ||
| case CatalogObjectIdentifier(None, ident) => | ||
| ident.namespace match { | ||
| case Array() => | ||
| Some(TableIdentifier(ident.name)) | ||
| case Array(database) => | ||
| Some(TableIdentifier(ident.name, Some(database))) | ||
| case CatalogAndIdentifier(None, names) if defaultCatalog.isEmpty => | ||
| names match { | ||
| case Seq(name) => | ||
| Some(TableIdentifier(name)) | ||
| case Seq(database, name) => | ||
| Some(TableIdentifier(name, Some(database))) | ||
| case _ => | ||
| None | ||
| } | ||
| case _ => | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * For temp views, extract a table identifier from a multi-part identifier if it has no catalog. | ||
| */ | ||
| object AsTemporaryViewIdentifier { | ||
| def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match { | ||
| case CatalogAndIdentifier(None, Seq(table)) => | ||
| Some(TableIdentifier(table)) | ||
| case CatalogAndIdentifier(None, Seq(database, table)) => | ||
| Some(TableIdentifier(table, Some(database))) | ||
| case _ => | ||
| None | ||
| } | ||
| } | ||
| } | ||
|
|
||
| object LookupCatalog { | ||
| val SESSION_CATALOG_NAME: String = "session" | ||
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,7 +37,7 @@ object UpdateAttributeNullability extends Rule[LogicalPlan] { | |
| case p if !p.resolved => p | ||
| // Skip leaf node, as it has no child and no need to update nullability. | ||
| case p: LeafNode => p | ||
| case p: LogicalPlan => | ||
| case p: LogicalPlan if p.childrenResolved => | ||
|
||
| val nullabilities = p.children.flatMap(c => c.output).groupBy(_.exprId).map { | ||
| // If there are multiple Attributes having the same ExprId, we need to resolve | ||
| // the conflict of nullable field. We do not really expect this to happen. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.