From 0975feaf0036bc995b33fa2bd20db6e14fc1beaa Mon Sep 17 00:00:00 2001 From: "Rizzo Cascio, Fabio" Date: Tue, 15 Apr 2025 10:35:18 +0100 Subject: [PATCH 01/30] Issue #1186 html tags are not working in hugo --- .../in-dev/unreleased/access-control.md | 42 +++++++++---------- site/hugo.yaml | 6 ++- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/site/content/in-dev/unreleased/access-control.md b/site/content/in-dev/unreleased/access-control.md index 7c4c9cc8e8..f59d071fa7 100644 --- a/site/content/in-dev/unreleased/access-control.md +++ b/site/content/in-dev/unreleased/access-control.md @@ -84,11 +84,11 @@ principal roles. Likewise, a principal role can be granted to one or more catalo The following table displays examples of catalog roles that you might configure in Polaris: -| Example Catalog role | Description | -| -----------------------| ----------- | -| Catalog administrators | A role that has been granted multiple privileges to emulate full access to the catalog.

Principal roles that have been granted this role are permitted to create, alter, read, write, and drop tables in the catalog. | -| Catalog readers | A role that has been granted read-only privileges to tables in the catalog.

Principal roles that have been granted this role are allowed to read from tables in the catalog. | -| Catalog contributor | A role that has been granted read and write access privileges to all tables that belong to the catalog.

Principal roles that have been granted this role are allowed to perform read and write operations on tables in the catalog. | +| Example Catalog role | Description | +| -----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Catalog administrators | A role that has been granted multiple privileges to emulate full access to the catalog.
Principal roles that have been granted this role are permitted to create, alter, read, write, and drop tables in the catalog. | +| Catalog readers | A role that has been granted read-only privileges to tables in the catalog.
Principal roles that have been granted this role are allowed to read from tables in the catalog. | +| Catalog contributor | A role that has been granted read and write access privileges to all tables that belong to the catalog.
Principal roles that have been granted this role are allowed to perform read and write operations on tables in the catalog. | ## RBAC model @@ -106,7 +106,7 @@ perform on objects in Polaris. > **Important** > -> You can only grant privileges at the catalog level. Fine-grained access controls are not available. For example, you can grant read +> You can only grant privileges at the catalog level. Fine-grained access controls are not available. For example, you can grant read > privileges to all tables in a catalog but not to an individual table in the catalog. To grant the full set of privileges (drop, list, read, write, etc.) on an object, you can use the *full privilege* option. @@ -162,32 +162,32 @@ The following diagram illustrates how RBAC works in Polaris and includes the following users: - **Alice:** A service admin who signs up for Polaris. Alice can - create service principals. She can also create catalogs and - namespaces and configure access control for Polaris resources. + create service principals. She can also create catalogs and + namespaces and configure access control for Polaris resources. -- **Bob:** A data engineer who uses Apache Spark™ to - interact with Polaris. +- **Bob:** A data engineer who uses Apache Spark™ to + interact with Polaris. - Alice has created a service principal for Bob. It has been - granted the Data_engineer principal role, which in turn has been - granted the following catalog roles: Catalog contributor and - Data administrator (for both the Silver and Gold zone catalogs - in the following diagram). + granted the Data_engineer principal role, which in turn has been + granted the following catalog roles: Catalog contributor and + Data administrator (for both the Silver and Gold zone catalogs + in the following diagram). - The Catalog contributor role grants permission to create - namespaces and tables in the Bronze zone catalog. + namespaces and tables in the Bronze zone catalog. - The Data administrator roles grant full administrative rights to - the Silver zone catalog and Gold zone catalog. + the Silver zone catalog and Gold zone catalog. - **Mark:** A data scientist who uses trains models with data managed - by Polaris. + by Polaris. - Alice has created a service principal for Mark. It has been - granted the Data_scientist principal role, which in turn has - been granted the catalog role named Catalog reader. + granted the Data_scientist principal role, which in turn has + been granted the catalog role named Catalog reader. - The Catalog reader role grants read-only access for a catalog - named Gold zone catalog. + named Gold zone catalog. -![Diagram that shows an example of how RBAC works in Apache Polaris.](/img/rbac-example.svg "Apache Polaris RBAC example") +![Diagram that shows an example of how RBAC works in Apache Polaris.](/img/rbac-example.svg "Apache Polaris RBAC example") \ No newline at end of file diff --git a/site/hugo.yaml b/site/hugo.yaml index 3ee235bdab..10548adcd9 100644 --- a/site/hugo.yaml +++ b/site/hugo.yaml @@ -114,7 +114,7 @@ menu: - name: "Downloads" identifier: "downloads" - url: "/downloads" + url: "/downloads" weight: 200 - name: "Community" @@ -236,6 +236,9 @@ markup: noHl: false style: 'monokai' tabWidth: 4 + goldmark: + renderer: + unsafe: true security: exec: @@ -255,4 +258,3 @@ privacy: simple: true youtube: privacyEnhanced: true - From 545b84b335f60cfb5746733f4af497e87b14beaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JB=20Onofr=C3=A9?= Date: Wed, 16 Apr 2025 20:04:36 +0200 Subject: [PATCH 02/30] Update the link to OpenAPI in the documentation (#1379) --- site/content/in-dev/0.9.0/rest-catalog-open-api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/content/in-dev/0.9.0/rest-catalog-open-api.md b/site/content/in-dev/0.9.0/rest-catalog-open-api.md index ecb43a83f3..e86c3186b1 100644 --- a/site/content/in-dev/0.9.0/rest-catalog-open-api.md +++ b/site/content/in-dev/0.9.0/rest-catalog-open-api.md @@ -24,4 +24,4 @@ params: show_page_toc: false --- -{{< redoc-polaris "rest-catalog-open-api.yaml" >}} +{{< redoc-polaris "https://raw.githubusercontent.com/apache/polaris/refs/tags/apache-polaris-0.9.0-incubating/spec/rest-catalog-open-api.yaml" >}} From 7425e65e5cd8e988e359027616b9c88ca5b488d1 Mon Sep 17 00:00:00 2001 From: gh-yzou <167037035+gh-yzou@users.noreply.github.com> Date: Thu, 17 Apr 2025 09:55:24 -0700 Subject: [PATCH 03/30] Integration test for Spark Client (#1349) * add integration test * add change * add comments * rebase main * update class comments * add base integration * clean up comments --- build-logic/src/main/kotlin/Utilities.kt | 66 +++++ .../ext/PolarisSparkIntegrationTestBase.java | 235 ++++++++++++++++ .../it/test/PolarisSparkIntegrationTest.java | 213 +------------- plugins/pluginlibs.versions.toml | 1 - .../spark/v3.5/integration/build.gradle.kts | 119 ++++++++ .../spark/quarkus/it/SparkCatalogBaseIT.java | 261 ++++++++++++++++++ .../quarkus/it/SparkCatalogIcebergIT.java | 48 ++++ .../quarkus/it/SparkCatalogPolarisIT.java | 24 ++ .../polaris/spark/quarkus/it/SparkIT.java | 110 ++++++++ .../quarkus/it/SparkIntegrationBase.java | 111 ++++++++ ...olaris.service.it.ext.PolarisServerManager | 20 ++ .../spark/v3.5/{ => spark}/build.gradle.kts | 12 +- .../apache/polaris/spark/PolarisCatalog.java | 0 .../polaris/spark/PolarisRESTCatalog.java | 0 .../polaris/spark/PolarisSparkCatalog.java | 0 .../apache/polaris/spark/SparkCatalog.java | 0 .../rest/CreateGenericTableRESTRequest.java | 0 .../rest/LoadGenericTableRESTResponse.java | 0 .../polaris/spark/utils/DeltaHelper.java | 0 .../spark/utils/PolarisCatalogUtils.java | 0 .../polaris/spark/NoopDeltaCatalog.java | 0 .../polaris/spark/PolarisInMemoryCatalog.java | 0 .../polaris/spark/SparkCatalogTest.java | 0 .../spark/rest/DeserializationTest.java | 0 quarkus/spark-tests/build.gradle.kts | 29 -- settings.gradle.kts | 15 +- 26 files changed, 1009 insertions(+), 255 deletions(-) create mode 100644 build-logic/src/main/kotlin/Utilities.kt create mode 100644 integration-tests/src/main/java/org/apache/polaris/service/it/ext/PolarisSparkIntegrationTestBase.java create mode 100644 plugins/spark/v3.5/integration/build.gradle.kts create mode 100644 plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java create mode 100644 plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java create mode 100644 plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java create mode 100644 plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java create mode 100644 plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java create mode 100644 plugins/spark/v3.5/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager rename plugins/spark/v3.5/{ => spark}/build.gradle.kts (94%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/PolarisCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/SparkCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java (100%) rename plugins/spark/v3.5/{ => spark}/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java (100%) rename plugins/spark/v3.5/{ => spark}/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java (100%) rename plugins/spark/v3.5/{ => spark}/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java (100%) rename plugins/spark/v3.5/{ => spark}/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java (100%) diff --git a/build-logic/src/main/kotlin/Utilities.kt b/build-logic/src/main/kotlin/Utilities.kt new file mode 100644 index 0000000000..6c235bf398 --- /dev/null +++ b/build-logic/src/main/kotlin/Utilities.kt @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Project +import org.gradle.process.JavaForkOptions + +/** + * Extract the scala version from polaris spark project, and points the build directory to a sub-dir + * that uses scala version as name. The polaris spark project name is in format of + * -_, for example: polaris-spark-3.5_2.12. + */ +fun Project.getAndUseScalaVersionForProject(): String { + val sparkScala = project.name.split("-").last().split("_") + + val scalaVersion = sparkScala[1] + + // direct the build to build/ to avoid potential collision problem + project.layout.buildDirectory.set(layout.buildDirectory.dir(scalaVersion).get()) + + return scalaVersion +} + +/** + * Adds the JPMS options required for Spark to run on Java 17, taken from the + * `DEFAULT_MODULE_OPTIONS` constant in `org.apache.spark.launcher.JavaModuleOptions`. + */ +fun JavaForkOptions.addSparkJvmOptions() { + jvmArgs = + (jvmArgs ?: emptyList()) + + listOf( + // Spark 3.3+ + "-XX:+IgnoreUnrecognizedVMOptions", + "--add-opens=java.base/java.lang=ALL-UNNAMED", + "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", + "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", + "--add-opens=java.base/java.io=ALL-UNNAMED", + "--add-opens=java.base/java.net=ALL-UNNAMED", + "--add-opens=java.base/java.nio=ALL-UNNAMED", + "--add-opens=java.base/java.util=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED", + "--add-opens=java.base/sun.security.action=ALL-UNNAMED", + "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", + "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED", + // Spark 3.4+ + "-Djdk.reflect.useDirectMethodHandle=false", + ) +} diff --git a/integration-tests/src/main/java/org/apache/polaris/service/it/ext/PolarisSparkIntegrationTestBase.java b/integration-tests/src/main/java/org/apache/polaris/service/it/ext/PolarisSparkIntegrationTestBase.java new file mode 100644 index 0000000000..670d39a7da --- /dev/null +++ b/integration-tests/src/main/java/org/apache/polaris/service/it/ext/PolarisSparkIntegrationTestBase.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.it.ext; + +import static org.apache.polaris.service.it.env.PolarisClient.polarisClient; + +import com.adobe.testing.s3mock.testcontainers.S3MockContainer; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import org.apache.polaris.core.admin.model.AwsStorageConfigInfo; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogProperties; +import org.apache.polaris.core.admin.model.ExternalCatalog; +import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.StorageConfigInfo; +import org.apache.polaris.service.it.env.CatalogApi; +import org.apache.polaris.service.it.env.ClientCredentials; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.apache.polaris.service.it.env.ManagementApi; +import org.apache.polaris.service.it.env.PolarisApiEndpoints; +import org.apache.polaris.service.it.env.PolarisClient; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.LoggerFactory; + +@ExtendWith(PolarisIntegrationTestExtension.class) +public abstract class PolarisSparkIntegrationTestBase { + protected static final S3MockContainer s3Container = + new S3MockContainer("3.11.0").withInitialBuckets("my-bucket,my-old-bucket"); + protected static SparkSession spark; + protected PolarisApiEndpoints endpoints; + protected PolarisClient client; + protected ManagementApi managementApi; + protected CatalogApi catalogApi; + protected String sparkToken; + protected String catalogName; + protected String externalCatalogName; + + protected URI warehouseDir; + + @BeforeAll + public static void setup() throws IOException { + s3Container.start(); + } + + @AfterAll + public static void cleanup() { + s3Container.stop(); + } + + @BeforeEach + public void before( + PolarisApiEndpoints apiEndpoints, ClientCredentials credentials, @TempDir Path tempDir) { + endpoints = apiEndpoints; + client = polarisClient(endpoints); + sparkToken = client.obtainToken(credentials); + managementApi = client.managementApi(credentials); + catalogApi = client.catalogApi(credentials); + + warehouseDir = IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve("spark-warehouse"); + + catalogName = client.newEntityName("spark_catalog"); + externalCatalogName = client.newEntityName("spark_ext_catalog"); + + AwsStorageConfigInfo awsConfigModel = + AwsStorageConfigInfo.builder() + .setRoleArn("arn:aws:iam::123456789012:role/my-role") + .setExternalId("externalId") + .setUserArn("userArn") + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3) + .setAllowedLocations(List.of("s3://my-old-bucket/path/to/data")) + .build(); + CatalogProperties props = new CatalogProperties("s3://my-bucket/path/to/data"); + props.putAll( + Map.of( + "table-default.s3.endpoint", + s3Container.getHttpEndpoint(), + "table-default.s3.path-style-access", + "true", + "table-default.s3.access-key-id", + "foo", + "table-default.s3.secret-access-key", + "bar", + "s3.endpoint", + s3Container.getHttpEndpoint(), + "s3.path-style-access", + "true", + "s3.access-key-id", + "foo", + "s3.secret-access-key", + "bar")); + Catalog catalog = + PolarisCatalog.builder() + .setType(Catalog.TypeEnum.INTERNAL) + .setName(catalogName) + .setProperties(props) + .setStorageConfigInfo(awsConfigModel) + .build(); + + managementApi.createCatalog(catalog); + + CatalogProperties externalProps = new CatalogProperties("s3://my-bucket/path/to/data"); + externalProps.putAll( + Map.of( + "table-default.s3.endpoint", + s3Container.getHttpEndpoint(), + "table-default.s3.path-style-access", + "true", + "table-default.s3.access-key-id", + "foo", + "table-default.s3.secret-access-key", + "bar", + "s3.endpoint", + s3Container.getHttpEndpoint(), + "s3.path-style-access", + "true", + "s3.access-key-id", + "foo", + "s3.secret-access-key", + "bar")); + Catalog externalCatalog = + ExternalCatalog.builder() + .setType(Catalog.TypeEnum.EXTERNAL) + .setName(externalCatalogName) + .setProperties(externalProps) + .setStorageConfigInfo(awsConfigModel) + .build(); + + managementApi.createCatalog(externalCatalog); + + SparkSession.Builder sessionBuilder = + SparkSession.builder() + .master("local[1]") + .config("spark.hadoop.fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") + .config( + "spark.hadoop.fs.s3.aws.credentials.provider", + "org.apache.hadoop.fs.s3.TemporaryAWSCredentialsProvider") + .config("spark.hadoop.fs.s3.access.key", "foo") + .config("spark.hadoop.fs.s3.secret.key", "bar") + .config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.ui.showConsoleProgress", false) + .config("spark.ui.enabled", "false"); + spark = + withCatalog(withCatalog(sessionBuilder, catalogName), externalCatalogName).getOrCreate(); + + onSpark("USE " + catalogName); + } + + protected SparkSession.Builder withCatalog(SparkSession.Builder builder, String catalogName) { + return builder + .config( + String.format("spark.sql.catalog.%s", catalogName), + "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.warehouse.dir", warehouseDir.toString()) + .config(String.format("spark.sql.catalog.%s.type", catalogName), "rest") + .config( + String.format("spark.sql.catalog.%s.uri", catalogName), + endpoints.catalogApiEndpoint().toString()) + .config(String.format("spark.sql.catalog.%s.warehouse", catalogName), catalogName) + .config(String.format("spark.sql.catalog.%s.scope", catalogName), "PRINCIPAL_ROLE:ALL") + .config( + String.format("spark.sql.catalog.%s.header.realm", catalogName), endpoints.realmId()) + .config(String.format("spark.sql.catalog.%s.token", catalogName), sparkToken) + .config(String.format("spark.sql.catalog.%s.s3.access-key-id", catalogName), "fakekey") + .config( + String.format("spark.sql.catalog.%s.s3.secret-access-key", catalogName), "fakesecret") + .config(String.format("spark.sql.catalog.%s.s3.region", catalogName), "us-west-2"); + } + + @AfterEach + public void after() throws Exception { + cleanupCatalog(catalogName); + cleanupCatalog(externalCatalogName); + try { + SparkSession.clearDefaultSession(); + SparkSession.clearActiveSession(); + spark.close(); + } catch (Exception e) { + LoggerFactory.getLogger(getClass()).error("Unable to close spark session", e); + } + + client.close(); + } + + protected void cleanupCatalog(String catalogName) { + onSpark("USE " + catalogName); + List namespaces = onSpark("SHOW NAMESPACES").collectAsList(); + for (Row namespace : namespaces) { + List tables = onSpark("SHOW TABLES IN " + namespace.getString(0)).collectAsList(); + for (Row table : tables) { + onSpark("DROP TABLE " + namespace.getString(0) + "." + table.getString(1)); + } + List views = onSpark("SHOW VIEWS IN " + namespace.getString(0)).collectAsList(); + for (Row view : views) { + onSpark("DROP VIEW " + namespace.getString(0) + "." + view.getString(1)); + } + onSpark("DROP NAMESPACE " + namespace.getString(0)); + } + + managementApi.deleteCatalog(catalogName); + } + + protected static Dataset onSpark(@Language("SQL") String sql) { + return spark.sql(sql); + } +} diff --git a/integration-tests/src/main/java/org/apache/polaris/service/it/test/PolarisSparkIntegrationTest.java b/integration-tests/src/main/java/org/apache/polaris/service/it/test/PolarisSparkIntegrationTest.java index 10868c1ea9..2bdd109281 100644 --- a/integration-tests/src/main/java/org/apache/polaris/service/it/test/PolarisSparkIntegrationTest.java +++ b/integration-tests/src/main/java/org/apache/polaris/service/it/test/PolarisSparkIntegrationTest.java @@ -18,47 +18,20 @@ */ package org.apache.polaris.service.it.test; -import static org.apache.polaris.service.it.env.PolarisClient.polarisClient; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import com.adobe.testing.s3mock.testcontainers.S3MockContainer; import com.google.common.collect.ImmutableMap; import jakarta.ws.rs.client.Entity; import jakarta.ws.rs.core.Response; -import java.io.IOException; -import java.net.URI; -import java.nio.file.Path; import java.time.Instant; import java.util.List; import java.util.Map; import org.apache.iceberg.rest.requests.ImmutableRegisterTableRequest; import org.apache.iceberg.rest.responses.LoadTableResponse; -import org.apache.polaris.core.admin.model.AwsStorageConfigInfo; -import org.apache.polaris.core.admin.model.Catalog; -import org.apache.polaris.core.admin.model.CatalogProperties; -import org.apache.polaris.core.admin.model.ExternalCatalog; -import org.apache.polaris.core.admin.model.PolarisCatalog; -import org.apache.polaris.core.admin.model.StorageConfigInfo; -import org.apache.polaris.service.it.env.CatalogApi; -import org.apache.polaris.service.it.env.ClientCredentials; -import org.apache.polaris.service.it.env.IntegrationTestsHelper; -import org.apache.polaris.service.it.env.ManagementApi; -import org.apache.polaris.service.it.env.PolarisApiEndpoints; -import org.apache.polaris.service.it.env.PolarisClient; -import org.apache.polaris.service.it.ext.PolarisIntegrationTestExtension; -import org.apache.spark.sql.Dataset; +import org.apache.polaris.service.it.ext.PolarisSparkIntegrationTestBase; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -import org.intellij.lang.annotations.Language; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.io.TempDir; -import org.slf4j.LoggerFactory; /** * @implSpec This test expects the server to be configured with the following features enabled: @@ -71,185 +44,7 @@ * {@code true} * */ -@ExtendWith(PolarisIntegrationTestExtension.class) -public class PolarisSparkIntegrationTest { - - private static final S3MockContainer s3Container = - new S3MockContainer("3.11.0").withInitialBuckets("my-bucket,my-old-bucket"); - private static SparkSession spark; - private PolarisApiEndpoints endpoints; - private PolarisClient client; - private ManagementApi managementApi; - private CatalogApi catalogApi; - private String sparkToken; - private String catalogName; - private String externalCatalogName; - - private URI warehouseDir; - - @BeforeAll - public static void setup() throws IOException { - s3Container.start(); - } - - @AfterAll - public static void cleanup() { - s3Container.stop(); - } - - @BeforeEach - public void before( - PolarisApiEndpoints apiEndpoints, ClientCredentials credentials, @TempDir Path tempDir) { - endpoints = apiEndpoints; - client = polarisClient(endpoints); - sparkToken = client.obtainToken(credentials); - managementApi = client.managementApi(credentials); - catalogApi = client.catalogApi(credentials); - - warehouseDir = IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve("spark-warehouse"); - - catalogName = client.newEntityName("spark_catalog"); - externalCatalogName = client.newEntityName("spark_ext_catalog"); - - AwsStorageConfigInfo awsConfigModel = - AwsStorageConfigInfo.builder() - .setRoleArn("arn:aws:iam::123456789012:role/my-role") - .setExternalId("externalId") - .setUserArn("userArn") - .setStorageType(StorageConfigInfo.StorageTypeEnum.S3) - .setAllowedLocations(List.of("s3://my-old-bucket/path/to/data")) - .build(); - CatalogProperties props = new CatalogProperties("s3://my-bucket/path/to/data"); - props.putAll( - Map.of( - "table-default.s3.endpoint", - s3Container.getHttpEndpoint(), - "table-default.s3.path-style-access", - "true", - "table-default.s3.access-key-id", - "foo", - "table-default.s3.secret-access-key", - "bar", - "s3.endpoint", - s3Container.getHttpEndpoint(), - "s3.path-style-access", - "true", - "s3.access-key-id", - "foo", - "s3.secret-access-key", - "bar")); - Catalog catalog = - PolarisCatalog.builder() - .setType(Catalog.TypeEnum.INTERNAL) - .setName(catalogName) - .setProperties(props) - .setStorageConfigInfo(awsConfigModel) - .build(); - - managementApi.createCatalog(catalog); - - CatalogProperties externalProps = new CatalogProperties("s3://my-bucket/path/to/data"); - externalProps.putAll( - Map.of( - "table-default.s3.endpoint", - s3Container.getHttpEndpoint(), - "table-default.s3.path-style-access", - "true", - "table-default.s3.access-key-id", - "foo", - "table-default.s3.secret-access-key", - "bar", - "s3.endpoint", - s3Container.getHttpEndpoint(), - "s3.path-style-access", - "true", - "s3.access-key-id", - "foo", - "s3.secret-access-key", - "bar")); - Catalog externalCatalog = - ExternalCatalog.builder() - .setType(Catalog.TypeEnum.EXTERNAL) - .setName(externalCatalogName) - .setProperties(externalProps) - .setStorageConfigInfo(awsConfigModel) - .build(); - - managementApi.createCatalog(externalCatalog); - - SparkSession.Builder sessionBuilder = - SparkSession.builder() - .master("local[1]") - .config("spark.hadoop.fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") - .config( - "spark.hadoop.fs.s3.aws.credentials.provider", - "org.apache.hadoop.fs.s3.TemporaryAWSCredentialsProvider") - .config("spark.hadoop.fs.s3.access.key", "foo") - .config("spark.hadoop.fs.s3.secret.key", "bar") - .config( - "spark.sql.extensions", - "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") - .config("spark.ui.showConsoleProgress", false) - .config("spark.ui.enabled", "false"); - spark = - withCatalog(withCatalog(sessionBuilder, catalogName), externalCatalogName).getOrCreate(); - - onSpark("USE " + catalogName); - } - - private SparkSession.Builder withCatalog(SparkSession.Builder builder, String catalogName) { - return builder - .config( - String.format("spark.sql.catalog.%s", catalogName), - "org.apache.iceberg.spark.SparkCatalog") - .config("spark.sql.warehouse.dir", warehouseDir.toString()) - .config(String.format("spark.sql.catalog.%s.type", catalogName), "rest") - .config( - String.format("spark.sql.catalog.%s.uri", catalogName), - endpoints.catalogApiEndpoint().toString()) - .config(String.format("spark.sql.catalog.%s.warehouse", catalogName), catalogName) - .config(String.format("spark.sql.catalog.%s.scope", catalogName), "PRINCIPAL_ROLE:ALL") - .config( - String.format("spark.sql.catalog.%s.header.realm", catalogName), endpoints.realmId()) - .config(String.format("spark.sql.catalog.%s.token", catalogName), sparkToken) - .config(String.format("spark.sql.catalog.%s.s3.access-key-id", catalogName), "fakekey") - .config( - String.format("spark.sql.catalog.%s.s3.secret-access-key", catalogName), "fakesecret") - .config(String.format("spark.sql.catalog.%s.s3.region", catalogName), "us-west-2"); - } - - @AfterEach - public void after() throws Exception { - cleanupCatalog(catalogName); - cleanupCatalog(externalCatalogName); - try { - SparkSession.clearDefaultSession(); - SparkSession.clearActiveSession(); - spark.close(); - } catch (Exception e) { - LoggerFactory.getLogger(getClass()).error("Unable to close spark session", e); - } - - client.close(); - } - - private void cleanupCatalog(String catalogName) { - onSpark("USE " + catalogName); - List namespaces = onSpark("SHOW NAMESPACES").collectAsList(); - for (Row namespace : namespaces) { - List tables = onSpark("SHOW TABLES IN " + namespace.getString(0)).collectAsList(); - for (Row table : tables) { - onSpark("DROP TABLE " + namespace.getString(0) + "." + table.getString(1)); - } - List views = onSpark("SHOW VIEWS IN " + namespace.getString(0)).collectAsList(); - for (Row view : views) { - onSpark("DROP VIEW " + namespace.getString(0) + "." + view.getString(1)); - } - onSpark("DROP NAMESPACE " + namespace.getString(0)); - } - - managementApi.deleteCatalog(catalogName); - } +public class PolarisSparkIntegrationTest extends PolarisSparkIntegrationTestBase { @Test public void testCreateTable() { @@ -363,8 +158,4 @@ private LoadTableResponse loadTable(String catalog, String namespace, String tab return response.readEntity(LoadTableResponse.class); } } - - private static Dataset onSpark(@Language("SQL") String sql) { - return spark.sql(sql); - } } diff --git a/plugins/pluginlibs.versions.toml b/plugins/pluginlibs.versions.toml index e48f6ef45a..4f7288ff64 100644 --- a/plugins/pluginlibs.versions.toml +++ b/plugins/pluginlibs.versions.toml @@ -22,4 +22,3 @@ iceberg = "1.8.1" spark35 = "3.5.5" scala212 = "2.12.19" scala213 = "2.13.15" - diff --git a/plugins/spark/v3.5/integration/build.gradle.kts b/plugins/spark/v3.5/integration/build.gradle.kts new file mode 100644 index 0000000000..e4611feb37 --- /dev/null +++ b/plugins/spark/v3.5/integration/build.gradle.kts @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + alias(libs.plugins.quarkus) + alias(libs.plugins.jandex) + id("polaris-quarkus") +} + +// get version information +val sparkMajorVersion = "3.5" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark35Version = pluginlibs.versions.spark35.get() +val scalaLibraryVersion = + if (scalaVersion == "2.12") { + pluginlibs.versions.scala212.get() + } else { + pluginlibs.versions.scala213.get() + } + +dependencies { + // must be enforced to get a consistent and validated set of dependencies + implementation(enforcedPlatform(libs.quarkus.bom)) { + exclude(group = "org.antlr", module = "antlr4-runtime") + exclude(group = "org.scala-lang", module = "scala-library") + exclude(group = "org.scala-lang", module = "scala-reflect") + } + + implementation(project(":polaris-quarkus-service")) + implementation(project(":polaris-api-management-model")) + implementation(project(":polaris-spark-${sparkMajorVersion}_${scalaVersion}")) + + implementation("org.apache.spark:spark-sql_${scalaVersion}:${spark35Version}") { + // exclude log4j dependencies + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-api") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.slf4j", "jul-to-slf4j") + } + + implementation(platform(libs.jackson.bom)) + implementation("com.fasterxml.jackson.core:jackson-annotations") + implementation("com.fasterxml.jackson.core:jackson-core") + implementation("com.fasterxml.jackson.core:jackson-databind") + + implementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + + testImplementation(testFixtures(project(":polaris-quarkus-service"))) + + testImplementation(platform(libs.quarkus.bom)) + testImplementation("io.quarkus:quarkus-junit5") + testImplementation("io.quarkus:quarkus-rest-client") + testImplementation("io.quarkus:quarkus-rest-client-jackson") + + testImplementation(platform(libs.awssdk.bom)) + testImplementation("software.amazon.awssdk:glue") + testImplementation("software.amazon.awssdk:kms") + testImplementation("software.amazon.awssdk:dynamodb") + + testImplementation(platform(libs.testcontainers.bom)) + testImplementation("org.testcontainers:testcontainers") + testImplementation(libs.s3mock.testcontainers) + + // Required for Spark integration tests + testImplementation(enforcedPlatform("org.scala-lang:scala-library:${scalaLibraryVersion}")) + testImplementation(enforcedPlatform("org.scala-lang:scala-reflect:${scalaLibraryVersion}")) + testImplementation(libs.javax.servlet.api) + testImplementation(libs.antlr4.runtime) +} + +tasks.named("intTest").configure { + maxParallelForks = 1 + systemProperty("java.util.logging.manager", "org.jboss.logmanager.LogManager") + if (System.getenv("AWS_REGION") == null) { + environment("AWS_REGION", "us-west-2") + } + // Note: the test secrets are referenced in + // org.apache.polaris.service.quarkus.it.QuarkusServerManager + environment("POLARIS_BOOTSTRAP_CREDENTIALS", "POLARIS,test-admin,test-secret") + jvmArgs("--add-exports", "java.base/sun.nio.ch=ALL-UNNAMED") + // Need to allow a java security manager after Java 21, for Subject.getSubject to work + // "getSubject is supported only if a security manager is allowed". + systemProperty("java.security.manager", "allow") + // Same issue as above: allow a java security manager after Java 21 + // (this setting is for the application under test, while the setting above is for test code). + systemProperty("quarkus.test.arg-line", "-Djava.security.manager=allow") + val logsDir = project.layout.buildDirectory.get().asFile.resolve("logs") + // delete files from previous runs + doFirst { + // delete log files written by Polaris + logsDir.deleteRecursively() + // delete quarkus.log file (captured Polaris stdout/stderr) + project.layout.buildDirectory.get().asFile.resolve("quarkus.log").delete() + } + // This property is not honored in a per-profile application.properties file, + // so we need to set it here. + systemProperty("quarkus.log.file.path", logsDir.resolve("polaris.log").absolutePath) + // For Spark integration tests + addSparkJvmOptions() +} diff --git a/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java new file mode 100644 index 0000000000..575436e33b --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.util.Arrays; +import java.util.Map; +import org.apache.iceberg.exceptions.BadRequestException; +import org.apache.iceberg.spark.SupportsReplaceView; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.connector.catalog.*; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * This integration directly performs operations using the SparkCatalog instance, instead of going + * through Spark SQL interface. This provides a more direct testing capability against the Polaris + * SparkCatalog operations, some operations like listNamespaces under a namespace can not be + * triggered through a SQL interface directly with Spark. + */ +@QuarkusIntegrationTest +public abstract class SparkCatalogBaseIT extends SparkIntegrationBase { + private static StructType schema = new StructType().add("id", "long").add("name", "string"); + protected StagingTableCatalog tableCatalog = null; + protected SupportsNamespaces namespaceCatalog = null; + protected ViewCatalog viewCatalog = null; + protected SupportsReplaceView replaceViewCatalog = null; + + @BeforeEach + protected void loadCatalogs() { + Preconditions.checkArgument(spark != null, "No active spark found"); + Preconditions.checkArgument(catalogName != null, "No catalogName found"); + CatalogPlugin catalogPlugin = spark.sessionState().catalogManager().catalog(catalogName); + tableCatalog = (StagingTableCatalog) catalogPlugin; + namespaceCatalog = (SupportsNamespaces) catalogPlugin; + viewCatalog = (ViewCatalog) catalogPlugin; + replaceViewCatalog = (SupportsReplaceView) catalogPlugin; + } + + @Test + void testNamespaceOperations() throws Exception { + String[][] lv1ns = new String[][] {{"l1ns1"}, {"l1ns2"}}; + String[][] lv2ns1 = new String[][] {{"l1ns1", "l2ns1"}, {"l1ns1", "l2ns2"}}; + String[][] lv2ns2 = new String[][] {{"l1ns2", "l2ns3"}}; + + // create the namespaces + for (String[] namespace : lv1ns) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns1) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns2) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + + // list namespaces under root + String[][] lv1nsResult = namespaceCatalog.listNamespaces(); + assertThat(lv1nsResult.length).isEqualTo(lv1ns.length); + for (String[] namespace : lv1ns) { + assertThat(Arrays.asList(lv1nsResult)).contains(namespace); + } + // list namespace under l1ns1 + String[][] lv2ns1Result = namespaceCatalog.listNamespaces(lv1ns[0]); + assertThat(lv2ns1Result.length).isEqualTo(lv2ns1.length); + for (String[] namespace : lv2ns1) { + assertThat(Arrays.asList(lv2ns1Result)).contains(namespace); + } + // list namespace under l1ns2 + String[][] lv2ns2Result = namespaceCatalog.listNamespaces(lv1ns[1]); + assertThat(lv2ns2Result.length).isEqualTo(lv2ns2.length); + for (String[] namespace : lv2ns2) { + assertThat(Arrays.asList(lv2ns2Result)).contains(namespace); + } + // no namespace under l1ns2.l2ns3 + assertThat(namespaceCatalog.listNamespaces(lv2ns2[0]).length).isEqualTo(0); + + // drop the nested namespace under lv1ns[1] + namespaceCatalog.dropNamespace(lv2ns2[0], true); + assertThat(namespaceCatalog.listNamespaces(lv1ns[1]).length).isEqualTo(0); + namespaceCatalog.dropNamespace(lv1ns[1], true); + assertThatThrownBy(() -> namespaceCatalog.listNamespaces(lv1ns[1])) + .isInstanceOf(NoSuchNamespaceException.class); + + // directly drop lv1ns[0] should fail + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(lv1ns[0], true)) + .isInstanceOf(BadRequestException.class); + for (String[] namespace : lv2ns1) { + namespaceCatalog.dropNamespace(namespace, true); + } + namespaceCatalog.dropNamespace(lv1ns[0], true); + + // no more namespace available + assertThat(namespaceCatalog.listNamespaces().length).isEqualTo(0); + } + + @Test + void testAlterNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("owner", "user1"); + + namespaceCatalog.createNamespace(namespace, metadata); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "user1")); + + namespaceCatalog.alterNamespace(namespace, NamespaceChange.setProperty("owner", "new-user")); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "new-user")); + + // drop the namespace + namespaceCatalog.dropNamespace(namespace, true); + } + + @Test + void testBasicViewOperations() throws Exception { + String[] namespace = new String[] {"ns"}; + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + + Identifier viewIdentifier = Identifier.of(namespace, "test-view"); + String viewSql = "select id from test-table where id < 3"; + viewCatalog.createView( + viewIdentifier, + viewSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + + // load the view + View view = viewCatalog.loadView(viewIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // alter the view properties + viewCatalog.alterView(viewIdentifier, ViewChange.setProperty("owner", "user1")); + view = viewCatalog.loadView(viewIdentifier); + assertThat(view.properties()).contains(Map.entry("owner", "user1")); + + // rename the view + Identifier newIdentifier = Identifier.of(namespace, "new-view"); + viewCatalog.renameView(viewIdentifier, newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(viewIdentifier)) + .isInstanceOf(NoSuchViewException.class); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // replace the view + String newSql = "select id from test-table where id == 3"; + Map properties = Maps.newHashMap(); + properties.put("owner", "test-user"); + replaceViewCatalog.replaceView( + newIdentifier, + newSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + properties); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(newSql); + assertThat(view.properties()).contains(Map.entry("owner", "test-user")); + + // drop the view + viewCatalog.dropView(newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(newIdentifier)) + .isInstanceOf(NoSuchViewException.class); + } + + @Test + void testListViews() throws Exception { + String[] l1ns = new String[] {"ns"}; + namespaceCatalog.createNamespace(l1ns, Maps.newHashMap()); + + // create a new namespace under the default NS + String[] l2ns = new String[] {"ns", "nsl2"}; + namespaceCatalog.createNamespace(l2ns, Maps.newHashMap()); + // create one view under l1 + String view1Name = "test-view1"; + String view1SQL = "select id from test-table where id >= 3"; + viewCatalog.createView( + Identifier.of(l1ns, view1Name), + view1SQL, + catalogName, + l1ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + // create two views under the l2 namespace + String[] nsl2ViewNames = new String[] {"test-view2", "test-view3"}; + String[] nsl2ViewSQLs = + new String[] { + "select id from test-table where id == 3", "select id from test-table where id < 3" + }; + for (int i = 0; i < nsl2ViewNames.length; i++) { + viewCatalog.createView( + Identifier.of(l2ns, nsl2ViewNames[i]), + nsl2ViewSQLs[i], + catalogName, + l2ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + } + // list views under l1ns + Identifier[] l1Views = viewCatalog.listViews(l1ns); + assertThat(l1Views.length).isEqualTo(1); + assertThat(l1Views[0].name()).isEqualTo(view1Name); + + // list views under l2ns + Identifier[] l2Views = viewCatalog.listViews(l2ns); + assertThat(l2Views.length).isEqualTo(nsl2ViewSQLs.length); + for (String name : nsl2ViewNames) { + assertThat(Arrays.asList(l2Views)).contains(Identifier.of(l2ns, name)); + } + + // drop namespace fails since there are views under it + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(l2ns, true)) + .isInstanceOf(BadRequestException.class); + // drop the views + for (String name : nsl2ViewNames) { + viewCatalog.dropView(Identifier.of(l2ns, name)); + } + namespaceCatalog.dropNamespace(l2ns, true); + viewCatalog.dropView(Identifier.of(l1ns, view1Name)); + namespaceCatalog.dropNamespace(l1ns, true); + } +} diff --git a/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java new file mode 100644 index 0000000000..f3c411df23 --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import org.apache.spark.sql.SparkSession; + +@QuarkusIntegrationTest +public class SparkCatalogIcebergIT extends SparkCatalogBaseIT { + /** Initialize the spark catalog to use the iceberg spark catalog. */ + @Override + protected SparkSession.Builder withCatalog(SparkSession.Builder builder, String catalogName) { + return builder + .config( + String.format("spark.sql.catalog.%s", catalogName), + "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.warehouse.dir", warehouseDir.toString()) + .config(String.format("spark.sql.catalog.%s.type", catalogName), "rest") + .config( + String.format("spark.sql.catalog.%s.uri", catalogName), + endpoints.catalogApiEndpoint().toString()) + .config(String.format("spark.sql.catalog.%s.warehouse", catalogName), catalogName) + .config(String.format("spark.sql.catalog.%s.scope", catalogName), "PRINCIPAL_ROLE:ALL") + .config( + String.format("spark.sql.catalog.%s.header.realm", catalogName), endpoints.realmId()) + .config(String.format("spark.sql.catalog.%s.token", catalogName), sparkToken) + .config(String.format("spark.sql.catalog.%s.s3.access-key-id", catalogName), "fakekey") + .config( + String.format("spark.sql.catalog.%s.s3.secret-access-key", catalogName), "fakesecret") + .config(String.format("spark.sql.catalog.%s.s3.region", catalogName), "us-west-2"); + } +} diff --git a/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java new file mode 100644 index 0000000000..97a4c222db --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; + +@QuarkusIntegrationTest +public class SparkCatalogPolarisIT extends SparkCatalogBaseIT {} diff --git a/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java new file mode 100644 index 0000000000..f9af2609d0 --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.util.List; +import org.junit.jupiter.api.Test; + +@QuarkusIntegrationTest +public class SparkIT extends SparkIntegrationBase { + @Test + public void testNamespaces() { + List namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + + String[] l1NS = new String[] {"l1ns1", "l1ns2"}; + for (String ns : l1NS) { + sql("CREATE NAMESPACE %s", ns); + } + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + for (String ns : l1NS) { + assertThat(namespaces).contains(new Object[] {ns}); + } + String l2ns = "l2ns"; + // create a nested namespace + sql("CREATE NAMESPACE %s.%s", l1NS[0], l2ns); + // spark show namespace only shows + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + + // can not drop l1NS before the nested namespace is dropped + assertThatThrownBy(() -> sql("DROP NAMESPACE %s", l1NS[0])) + .hasMessageContaining(String.format("Namespace %s is not empty", l1NS[0])); + sql("DROP NAMESPACE %s.%s", l1NS[0], l2ns); + + for (String ns : l1NS) { + sql("DROP NAMESPACE %s", ns); + } + + // no namespace available after all drop + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + } + + @Test + public void testCreatDropView() { + String namespace = "ns"; + // create namespace ns + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create two views under the namespace + String view1Name = "testView1"; + String view2Name = "testView2"; + sql("CREATE VIEW %s AS SELECT 1 AS id", view1Name); + sql("CREATE VIEW %s AS SELECT 10 AS id", view2Name); + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(2); + assertThat(views).contains(new Object[] {namespace, view1Name, false}); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + // drop the views + sql("DROP VIEW %s", view1Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + sql("DROP VIEW %s", view2Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(0); + } + + @Test + public void renameView() { + sql("CREATE NAMESPACE ns"); + sql("USE ns"); + + String viewName = "originalView"; + String renamedView = "renamedView"; + sql("CREATE VIEW %s AS SELECT 1 AS id", viewName); + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {"ns", viewName, false}); + + sql("ALTER VIEW %s RENAME TO %s", viewName, renamedView); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {"ns", renamedView, false}); + } +} diff --git a/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java new file mode 100644 index 0000000000..b5006d6a79 --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import com.google.common.collect.ImmutableList; +import com.google.errorprone.annotations.FormatMethod; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.polaris.service.it.ext.PolarisSparkIntegrationTestBase; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; + +public abstract class SparkIntegrationBase extends PolarisSparkIntegrationTestBase { + + @Override + protected SparkSession.Builder withCatalog(SparkSession.Builder builder, String catalogName) { + return builder + .config( + String.format("spark.sql.catalog.%s", catalogName), + "org.apache.polaris.spark.SparkCatalog") + .config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") + .config("spark.sql.warehouse.dir", warehouseDir.toString()) + .config(String.format("spark.sql.catalog.%s.type", catalogName), "rest") + .config( + String.format("spark.sql.catalog.%s.uri", catalogName), + endpoints.catalogApiEndpoint().toString()) + .config(String.format("spark.sql.catalog.%s.warehouse", catalogName), catalogName) + .config(String.format("spark.sql.catalog.%s.scope", catalogName), "PRINCIPAL_ROLE:ALL") + .config( + String.format("spark.sql.catalog.%s.header.realm", catalogName), endpoints.realmId()) + .config(String.format("spark.sql.catalog.%s.token", catalogName), sparkToken) + .config(String.format("spark.sql.catalog.%s.s3.access-key-id", catalogName), "fakekey") + .config( + String.format("spark.sql.catalog.%s.s3.secret-access-key", catalogName), "fakesecret") + .config(String.format("spark.sql.catalog.%s.s3.region", catalogName), "us-west-2"); + } + + @Override + protected void cleanupCatalog(String catalogName) { + onSpark("USE " + catalogName); + List namespaces = onSpark("SHOW NAMESPACES").collectAsList(); + for (Row namespace : namespaces) { + // TODO: once all table operations are supported, remove the override of this function + // List tables = onSpark("SHOW TABLES IN " + namespace.getString(0)).collectAsList(); + // for (Row table : tables) { + // onSpark("DROP TABLE " + namespace.getString(0) + "." + table.getString(1)); + // } + List views = onSpark("SHOW VIEWS IN " + namespace.getString(0)).collectAsList(); + for (Row view : views) { + onSpark("DROP VIEW " + namespace.getString(0) + "." + view.getString(1)); + } + onSpark("DROP NAMESPACE " + namespace.getString(0)); + } + + managementApi.deleteCatalog(catalogName); + } + + @FormatMethod + protected List sql(String query, Object... args) { + List rows = spark.sql(String.format(query, args)).collectAsList(); + if (rows.isEmpty()) { + return ImmutableList.of(); + } + return rowsToJava(rows); + } + + protected List rowsToJava(List rows) { + return rows.stream().map(this::toJava).collect(Collectors.toList()); + } + + private Object[] toJava(Row row) { + return IntStream.range(0, row.size()) + .mapToObj( + pos -> { + if (row.isNullAt(pos)) { + return null; + } + + Object value = row.get(pos); + if (value instanceof Row) { + return toJava((Row) value); + } else if (value instanceof scala.collection.Seq) { + return row.getList(pos); + } else if (value instanceof scala.collection.Map) { + return row.getJavaMap(pos); + } else { + return value; + } + }) + .toArray(Object[]::new); + } +} diff --git a/plugins/spark/v3.5/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager b/plugins/spark/v3.5/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager new file mode 100644 index 0000000000..3c3881857b --- /dev/null +++ b/plugins/spark/v3.5/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.polaris.service.quarkus.it.QuarkusServerManager diff --git a/plugins/spark/v3.5/build.gradle.kts b/plugins/spark/v3.5/spark/build.gradle.kts similarity index 94% rename from plugins/spark/v3.5/build.gradle.kts rename to plugins/spark/v3.5/spark/build.gradle.kts index df37fa229e..ddf27ce1f9 100644 --- a/plugins/spark/v3.5/build.gradle.kts +++ b/plugins/spark/v3.5/spark/build.gradle.kts @@ -24,17 +24,6 @@ plugins { alias(libs.plugins.jandex) } -fun getAndUseScalaVersionForProject(): String { - val sparkScala = project.name.split("-").last().split("_") - - val scalaVersion = sparkScala[1] - - // direct the build to build/ to avoid potential collision problem - project.layout.buildDirectory.set(layout.buildDirectory.dir(scalaVersion).get()) - - return scalaVersion -} - // get version information val sparkMajorVersion = "3.5" val scalaVersion = getAndUseScalaVersionForProject() @@ -141,6 +130,7 @@ tasks.register("createPolarisSparkJar") { mergeServiceFiles() // pack both the source code and dependencies + from(sourceSets.main.get().output) configurations = listOf(project.configurations.runtimeClasspath.get()) diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisCatalog.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisCatalog.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/SparkCatalog.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/SparkCatalog.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java diff --git a/plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java b/plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java similarity index 100% rename from plugins/spark/v3.5/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java rename to plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java diff --git a/plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java b/plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java rename to plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java diff --git a/plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java b/plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java similarity index 100% rename from plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java rename to plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java diff --git a/plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java b/plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java similarity index 100% rename from plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java rename to plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java diff --git a/plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java b/plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java similarity index 100% rename from plugins/spark/v3.5/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java rename to plugins/spark/v3.5/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java diff --git a/quarkus/spark-tests/build.gradle.kts b/quarkus/spark-tests/build.gradle.kts index 18cc9d585f..0e83a7f279 100644 --- a/quarkus/spark-tests/build.gradle.kts +++ b/quarkus/spark-tests/build.gradle.kts @@ -88,32 +88,3 @@ tasks.named("intTest").configure { // For Spark integration tests addSparkJvmOptions() } - -/** - * Adds the JPMS options required for Spark to run on Java 17, taken from the - * `DEFAULT_MODULE_OPTIONS` constant in `org.apache.spark.launcher.JavaModuleOptions`. - */ -fun JavaForkOptions.addSparkJvmOptions() { - jvmArgs = - (jvmArgs ?: emptyList()) + - listOf( - // Spark 3.3+ - "-XX:+IgnoreUnrecognizedVMOptions", - "--add-opens=java.base/java.lang=ALL-UNNAMED", - "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", - "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED", - "--add-opens=java.base/java.io=ALL-UNNAMED", - "--add-opens=java.base/java.net=ALL-UNNAMED", - "--add-opens=java.base/java.nio=ALL-UNNAMED", - "--add-opens=java.base/java.util=ALL-UNNAMED", - "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", - "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED", - "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", - "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED", - "--add-opens=java.base/sun.security.action=ALL-UNNAMED", - "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED", - "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED", - // Spark 3.4+ - "-Djdk.reflect.useDirectMethodHandle=false", - ) -} diff --git a/settings.gradle.kts b/settings.gradle.kts index 3d658130d8..cbfbc3a269 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -71,12 +71,21 @@ for (sparkVersion in sparkVersions) { val scalaVersions = sparkScalaVersions["scalaVersions"].toString().split(",").map { it.trim() } var first = true for (scalaVersion in scalaVersions) { - val artifactId = "polaris-spark-${sparkVersion}_${scalaVersion}" - polarisProject(artifactId, file("${polarisSparkDir}/v${sparkVersion}")) + val sparkArtifactId = "polaris-spark-${sparkVersion}_${scalaVersion}" + val sparkIntArtifactId = "polaris-spark-integration-${sparkVersion}_${scalaVersion}" + polarisProject( + "polaris-spark-${sparkVersion}_${scalaVersion}", + file("${polarisSparkDir}/v${sparkVersion}/spark"), + ) + polarisProject( + "polaris-spark-integration-${sparkVersion}_${scalaVersion}", + file("${polarisSparkDir}/v${sparkVersion}/integration"), + ) if (first) { first = false } else { - noSourceChecksProjects.add(":$artifactId") + noSourceChecksProjects.add(":$sparkArtifactId") + noSourceChecksProjects.add(":$sparkIntArtifactId") } // Skip all duplicated spark client projects while using Intelij IDE. // This is to avoid problems during dependency analysis and sync when From af447f2aeec3c2037f1caedecf85bd5e293d51a7 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 17 Apr 2025 18:13:09 +0100 Subject: [PATCH 04/30] main: Update dependency net.ltgt.gradle:gradle-errorprone-plugin to v4.2.0 (#1392) --- gradle/baselibs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml index f35a940622..f0c8566c28 100644 --- a/gradle/baselibs.versions.toml +++ b/gradle/baselibs.versions.toml @@ -18,7 +18,7 @@ # [libraries] -errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version = "4.1.0" } +errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version = "4.2.0" } idea-ext = { module = "gradle.plugin.org.jetbrains.gradle.plugin.idea-ext:gradle-idea-ext", version = "1.1.10" } license-report = { module = "com.github.jk1:gradle-license-report", version = "2.9" } nexus-publish = { module = "io.github.gradle-nexus:publish-plugin", version = "2.0.0" } From 33ea06beeda263e85c54a334741e3a4e302742be Mon Sep 17 00:00:00 2001 From: gfakbar20 Date: Fri, 18 Apr 2025 00:32:05 +0700 Subject: [PATCH 05/30] Add generic table documentations (#1374) * add generic table documentation (incomplete) * fix table and spacing * remove documentation in client api since there is no implementation yet * remove spacing * minor fix - proof read * review fix, wording * add generic table documentation (incomplete) * fix table and spacing * remove documentation in client api since there is no implementation yet * remove spacing * minor fix - proof read * review fix, wording * proof read - punctuation fix * change table privilege reference --- site/content/in-dev/unreleased/access-control.md | 6 +++--- site/content/in-dev/unreleased/entities.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/site/content/in-dev/unreleased/access-control.md b/site/content/in-dev/unreleased/access-control.md index f59d071fa7..909ccb65d7 100644 --- a/site/content/in-dev/unreleased/access-control.md +++ b/site/content/in-dev/unreleased/access-control.md @@ -117,9 +117,9 @@ To grant the full set of privileges (drop, list, read, write, etc.) on an object | --------- | ----------- | | TABLE_CREATE | Enables registering a table with the catalog. | | TABLE_DROP | Enables dropping a table from the catalog. | -| TABLE_LIST | Enables listing any tables in the catalog. | -| TABLE_READ_PROPERTIES | Enables reading [properties](https://iceberg.apache.org/docs/nightly/configuration/#table-properties) of the table. | -| TABLE_WRITE_PROPERTIES | Enables configuring [properties](https://iceberg.apache.org/docs/nightly/configuration/#table-properties) for the table. | +| TABLE_LIST | Enables listing any table in the catalog. | +| TABLE_READ_PROPERTIES | Enables reading properties of the table. | +| TABLE_WRITE_PROPERTIES | Enables configuring properties for the table. | | TABLE_READ_DATA | Enables reading data from the table by receiving short-lived read-only storage credentials from the catalog. | | TABLE_WRITE_DATA | Enables writing data to the table by receiving short-lived read+write storage credentials from the catalog. | | TABLE_FULL_METADATA | Grants all table privileges, except TABLE_READ_DATA and TABLE_WRITE_DATA, which need to be granted individually. | diff --git a/site/content/in-dev/unreleased/entities.md b/site/content/in-dev/unreleased/entities.md index 9b1358b6b4..c0ee241a94 100644 --- a/site/content/in-dev/unreleased/entities.md +++ b/site/content/in-dev/unreleased/entities.md @@ -49,7 +49,7 @@ For information on managing namespaces with the REST API or for more information ## Table -Polaris tables are entities that map to [Apache Iceberg tables](https://iceberg.apache.org/docs/nightly/configuration/). +Polaris tables are entities that map to [Apache Iceberg tables](https://iceberg.apache.org/docs/nightly/configuration/), [Delta tables](https://docs.databricks.com/aws/en/delta/table-properties), or [Hudi tables](https://hudi.apache.org/docs/next/configurations#TABLE_CONFIG). For information on managing tables with the REST API or for more information on what data can be associated with a table, see [the API docs]({{% github-polaris "client/python/docs/CreateTableRequest.md" %}}). From d52854ec9fda8049c8bd9b76c672a35980685f01 Mon Sep 17 00:00:00 2001 From: Liam Bao <90495036+liamzwbao@users.noreply.github.com> Date: Thu, 17 Apr 2025 13:50:12 -0400 Subject: [PATCH 06/30] Unblock test `listNamespacesWithEmptyNamespace` (#1289) * Unblock test `listNamespacesWithEmptyNamespace` * Use `containsExactly` to simplify the test * Fix empty namespace behavior * Address comments * Block dropping empty namespace * Improve error messages --- .../quarkus/catalog/IcebergCatalogTest.java | 55 +++++++++++++++---- .../catalog/iceberg/IcebergCatalog.java | 20 ++++--- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/quarkus/service/src/test/java/org/apache/polaris/service/quarkus/catalog/IcebergCatalogTest.java b/quarkus/service/src/test/java/org/apache/polaris/service/quarkus/catalog/IcebergCatalogTest.java index 4a5506fa22..eb5a88d9e8 100644 --- a/quarkus/service/src/test/java/org/apache/polaris/service/quarkus/catalog/IcebergCatalogTest.java +++ b/quarkus/service/src/test/java/org/apache/polaris/service/quarkus/catalog/IcebergCatalogTest.java @@ -123,11 +123,11 @@ import org.apache.polaris.service.types.NotificationType; import org.apache.polaris.service.types.TableUpdateNotification; import org.assertj.core.api.Assertions; +import org.assertj.core.api.ThrowableAssert.ThrowingCallable; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.params.ParameterizedTest; @@ -164,6 +164,7 @@ public Map getConfigOverrides() { new Schema( required(3, "id", Types.IntegerType.get(), "unique ID 🤪"), required(4, "data", Types.StringType.get())); + private static final String VIEW_QUERY = "select * from ns1.layer1_table"; public static final String CATALOG_NAME = "polaris-catalog"; public static final String TEST_ACCESS_KEY = "test_access_key"; public static final String SECRET_ACCESS_KEY = "secret_access_key"; @@ -386,18 +387,48 @@ public Map purgeRealms(Iterable realms) { }; } - /** TODO: Unblock this test, see: https://github.com/apache/polaris/issues/1272 */ - @Override @Test - @Disabled( - """ - Disabled because the behavior is not applicable to Polaris. - To unblock: - 1) Align Polaris behavior with the superclass by handling empty namespaces the same way, or - 2) Modify this test to expect an exception and add a Polaris-specific version. - """) - public void listNamespacesWithEmptyNamespace() { - super.listNamespacesWithEmptyNamespace(); + public void testEmptyNamespace() { + IcebergCatalog catalog = catalog(); + TableIdentifier tableInRootNs = TableIdentifier.of("table"); + String expectedMessage = "Namespace does not exist: ''"; + + ThrowingCallable createEmptyNamespace = () -> catalog.createNamespace(Namespace.empty()); + Assertions.assertThatThrownBy(createEmptyNamespace) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Cannot create root namespace, as it already exists implicitly."); + + ThrowingCallable dropEmptyNamespace = () -> catalog.dropNamespace(Namespace.empty()); + Assertions.assertThatThrownBy(dropEmptyNamespace) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot drop root namespace"); + + ThrowingCallable createTable = () -> catalog.createTable(tableInRootNs, SCHEMA); + Assertions.assertThatThrownBy(createTable) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessageContaining(expectedMessage); + + ThrowingCallable createView = + () -> + catalog + .buildView(tableInRootNs) + .withSchema(SCHEMA) + .withDefaultNamespace(Namespace.empty()) + .withQuery("spark", VIEW_QUERY) + .create(); + Assertions.assertThatThrownBy(createView) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessageContaining(expectedMessage); + + ThrowingCallable listTables = () -> catalog.listTables(Namespace.empty()); + Assertions.assertThatThrownBy(listTables) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessageContaining(expectedMessage); + + ThrowingCallable listViews = () -> catalog.listViews(Namespace.empty()); + Assertions.assertThatThrownBy(listViews) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessageContaining(expectedMessage); } @Test diff --git a/service/common/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalog.java b/service/common/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalog.java index 3f9722f793..f80d4077ab 100644 --- a/service/common/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalog.java +++ b/service/common/src/main/java/org/apache/polaris/service/catalog/iceberg/IcebergCatalog.java @@ -462,9 +462,9 @@ public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { @Override public List listTables(Namespace namespace) { - if (!namespaceExists(namespace) && !namespace.isEmpty()) { + if (!namespaceExists(namespace)) { throw new NoSuchNamespaceException( - "Cannot list tables for namespace. Namespace does not exist: %s", namespace); + "Cannot list tables for namespace. Namespace does not exist: '%s'", namespace); } return listTableLike(PolarisEntitySubType.ICEBERG_TABLE, namespace); @@ -633,11 +633,17 @@ private PolarisResolvedPathWrapper getResolvedParentNamespace(Namespace namespac @Override public boolean namespaceExists(Namespace namespace) { - return resolvedEntityView.getResolvedPath(namespace) != null; + return Optional.ofNullable(namespace) + .filter(ns -> !ns.isEmpty()) + .map(resolvedEntityView::getResolvedPath) + .isPresent(); } @Override public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { + if (namespace.isEmpty()) { + throw new IllegalArgumentException("Cannot drop root namespace"); + } PolarisResolvedPathWrapper resolvedEntities = resolvedEntityView.getResolvedPath(namespace); if (resolvedEntities == null) { return false; @@ -798,9 +804,9 @@ public void close() throws IOException { @Override public List listViews(Namespace namespace) { - if (!namespaceExists(namespace) && !namespace.isEmpty()) { + if (!namespaceExists(namespace)) { throw new NoSuchNamespaceException( - "Cannot list views for namespace. Namespace does not exist: %s", namespace); + "Cannot list views for namespace. Namespace does not exist: '%s'", namespace); } return listTableLike(PolarisEntitySubType.ICEBERG_VIEW, namespace); @@ -1251,7 +1257,7 @@ public void doCommit(TableMetadata base, TableMetadata metadata) { // TODO: Maybe avoid writing metadata if there's definitely a transaction conflict if (null == base && !namespaceExists(tableIdentifier.namespace())) { throw new NoSuchNamespaceException( - "Cannot create table %s. Namespace does not exist: %s", + "Cannot create table '%s'. Namespace does not exist: '%s'", tableIdentifier, tableIdentifier.namespace()); } @@ -1492,7 +1498,7 @@ public void doCommit(ViewMetadata base, ViewMetadata metadata) { LOGGER.debug("doCommit for view {} with base {}, metadata {}", identifier, base, metadata); if (null == base && !namespaceExists(identifier.namespace())) { throw new NoSuchNamespaceException( - "Cannot create view %s. Namespace does not exist: %s", + "Cannot create view '%s'. Namespace does not exist: '%s'", identifier, identifier.namespace()); } From fa791aaa755d54c5a4e84bc4976ab91f55bb765a Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Thu, 17 Apr 2025 10:50:28 -0700 Subject: [PATCH 07/30] Revamp the Quick Start page (#1367) * First Draft with AWS * try again * try again * try again * try again * try again * try now * should work * AWS First Draft Complete * ensure file changed * Azure First Draft Complete * Azure First Draft, pt. 2 * Azure Completed * GCP First Draft * GCP Verified * File structure fixed * Remove Trino-specific tutorial * Restructured Quick Start * Addresses minor comments from @eric-maynard * Added reference to Deploying Polaris in Production * Fix MD Link Checker --------- Co-authored-by: Adnan Hemani --- getting-started/README.md | 4 +- .../assets/cloud_providers/deploy-aws.sh | 78 ++++ .../assets/cloud_providers/deploy-azure.sh | 40 ++ .../assets/cloud_providers/deploy-gcp.sh | 50 +++ .../assets/eclipselink/persistence.xml | 3 +- .../assets/polaris/create-catalog.sh | 8 +- getting-started/eclipselink/README.md | 22 +- .../docker-compose-bootstrap-db.yml | 32 ++ .../eclipselink/docker-compose-postgres.yml | 45 +++ .../eclipselink/docker-compose.yml | 62 +-- .../trino-config/catalog/iceberg.properties | 2 +- getting-started/trino/README.md | 61 --- .../trino/create-polaris-catalog.sh | 61 --- getting-started/trino/docker-compose.yml | 58 --- site/content/in-dev/unreleased/_index.md | 2 +- .../unreleased/getting-started/_index.md | 25 ++ .../deploying-polaris/_index.md | 27 ++ .../quickstart-deploy-aws.md | 54 +++ .../quickstart-deploy-azure.md | 50 +++ .../quickstart-deploy-gcp.md | 50 +++ .../getting-started/install-dependencies.md | 118 ++++++ .../unreleased/getting-started/quickstart.md | 123 ++++++ .../getting-started/using-polaris.md | 303 +++++++++++++++ site/content/in-dev/unreleased/quickstart.md | 356 ------------------ 24 files changed, 1034 insertions(+), 600 deletions(-) create mode 100644 getting-started/assets/cloud_providers/deploy-aws.sh create mode 100644 getting-started/assets/cloud_providers/deploy-azure.sh create mode 100644 getting-started/assets/cloud_providers/deploy-gcp.sh create mode 100644 getting-started/eclipselink/docker-compose-bootstrap-db.yml create mode 100644 getting-started/eclipselink/docker-compose-postgres.yml rename getting-started/{trino => eclipselink}/trino-config/catalog/iceberg.properties (95%) delete mode 100644 getting-started/trino/README.md delete mode 100644 getting-started/trino/create-polaris-catalog.sh delete mode 100644 getting-started/trino/docker-compose.yml create mode 100644 site/content/in-dev/unreleased/getting-started/_index.md create mode 100644 site/content/in-dev/unreleased/getting-started/deploying-polaris/_index.md create mode 100644 site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md create mode 100644 site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md create mode 100644 site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md create mode 100644 site/content/in-dev/unreleased/getting-started/install-dependencies.md create mode 100644 site/content/in-dev/unreleased/getting-started/quickstart.md create mode 100644 site/content/in-dev/unreleased/getting-started/using-polaris.md delete mode 100644 site/content/in-dev/unreleased/quickstart.md diff --git a/getting-started/README.md b/getting-started/README.md index 4257e2bad0..32a7376509 100644 --- a/getting-started/README.md +++ b/getting-started/README.md @@ -33,12 +33,10 @@ this directory. Each example has detailed instructions. - [Spark](spark): An example that uses an in-memory metastore, automatically bootstrapped, with Apache Spark and a Jupyter notebook. -- [Trino](trino): An example that uses Trino with Polaris. - - [Telemetry](telemetry): An example that includes Prometheus and Jaeger to collect metrics and traces from Apache Polaris. This example automatically creates a `polaris_demo` catalog. - [Eclipselink](eclipselink): An example that uses an Eclipselink metastore and a Postgres database. The realm is bootstrapped with the Polaris Admin tool. This example also creates a - `polaris_demo` catalog, and offers the ability to run Spark SQL queries. Finally, it shows how to + `polaris_quickstart` catalog, and offers the ability to run Spark SQL and Trino queries. Finally, it shows how to attach a debugger to the Polaris server. diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh new file mode 100644 index 0000000000..c943eb69b8 --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -0,0 +1,78 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) + +DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ + --instance-ids $EC2_INSTANCE_ID \ + --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ + --output json) + +CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") + +CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') + +ALL_SUBNETS=$(aws ec2 describe-subnets \ + --region $CURRENT_REGION \ + --query 'Subnets[*].{SubnetId:SubnetId}' \ + --output json \ + | jq -r '[.[]["SubnetId"]] | join(" ")') + +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8) +SUBNET_GROUP_NAME="polaris-db-subnet-group-$RANDOM_SUFFIX" +INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" + +aws rds create-db-subnet-group \ + --db-subnet-group-name $SUBNET_GROUP_NAME \ + --db-subnet-group-description "Apache Polaris Quickstart DB Subnet Group" \ + --subnet-ids $ALL_SUBNETS + +DB_INSTANCE_INFO=$(aws rds create-db-instance \ + --db-instance-identifier $INSTANCE_NAME \ + --db-instance-class db.t3.micro \ + --engine postgres \ + --master-username postgres \ + --master-user-password postgres \ + --db-name POLARIS \ + --db-subnet-group-name $SUBNET_GROUP_NAME \ + --allocated-storage 10) + +DB_ARN=$(echo $DB_INSTANCE_INFO | jq -r '.["DBInstance"]["DBInstanceArn"]') + +DESCRIBE_DB=$(aws rds describe-db-instances --db-instance-identifier $DB_ARN) + +until echo $DESCRIBE_DB | jq -e '.["DBInstances"][0] | has("Endpoint")'; +do + echo "sleeping 10s to wait for Postgres DB provisioning..." + sleep 10 + DESCRIBE_DB=$(aws rds describe-db-instances --db-instance-identifier $DB_ARN) +done + +POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq -r '"\(.Address):\(.Port)"') + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh new file mode 100644 index 0000000000..76ee85432b --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') +CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) +INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" + +CREATE_DB_RESPONSE=$(az postgres flexible-server create -l $CURRENT_REGION -g $CURRENT_RESOURCE_GROUP -n $INSTANCE_NAME -u postgres -p postgres -y) + +az postgres flexible-server db create -g $CURRENT_RESOURCE_GROUP -s $INSTANCE_NAME -d POLARIS + +POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host') + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh new file mode 100644 index 0000000000..5da93b5585 --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +CURRENT_ZONE=$(curl -H "Metadata-Flavor: Google" "http://169.254.169.254/computeMetadata/v1/instance/zone" | awk -F/ '{print $NF}') +CURRENT_REGION=$(echo $CURRENT_ZONE | sed 's/-[a-z]$//') +VM_INSTANCE_NAME=$(curl -H "Metadata-Flavor: Google" "http://169.254.169.254/computeMetadata/v1/instance/name") +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) +DB_INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" + +INSTANCE_IP=$(gcloud compute instances describe $VM_INSTANCE_NAME --zone=$CURRENT_ZONE --format="get(networkInterfaces[0].accessConfigs[0].natIP)") + + +gcloud sql instances create $DB_INSTANCE_NAME \ + --database-version=POSTGRES_17 \ + --region=$CURRENT_REGION \ + --tier=db-perf-optimized-N-4 \ + --edition=ENTERPRISE_PLUS \ + --root-password=postgres \ + --authorized-networks="$INSTANCE_IP/32" + +gcloud sql databases create POLARIS --instance=$DB_INSTANCE_NAME + +POSTGRES_ADDR=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="get(ipAddresses[0].ipAddress)") + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/getting-started/assets/eclipselink/persistence.xml b/getting-started/assets/eclipselink/persistence.xml index e569a91832..54fb795a39 100644 --- a/getting-started/assets/eclipselink/persistence.xml +++ b/getting-started/assets/eclipselink/persistence.xml @@ -32,8 +32,7 @@ org.apache.polaris.jpa.models.ModelSequenceId NONE - + diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index f069c66376..b7b8e0f51d 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -33,7 +33,7 @@ echo echo "Obtained access token: ${token}" echo -echo Creating a catalog named polaris_demo... +echo Creating a catalog named quickstart_catalog... curl -s -H "Authorization: Bearer ${token}" \ -H 'Accept: application/json' \ @@ -41,16 +41,16 @@ curl -s -H "Authorization: Bearer ${token}" \ http://polaris:8181/api/management/v1/catalogs \ -d '{ "catalog": { - "name": "polaris_demo", + "name": "quickstart_catalog", "type": "INTERNAL", "readOnly": false, "properties": { - "default-base-location": "file:///tmp/polaris/" + "default-base-location": "file:///var/tmp/quickstart_catalog/" }, "storageConfigInfo": { "storageType": "FILE", "allowedLocations": [ - "file:///tmp" + "file:///var/tmp" ] } } diff --git a/getting-started/eclipselink/README.md b/getting-started/eclipselink/README.md index dd6abf7c29..0f41d8da15 100644 --- a/getting-started/eclipselink/README.md +++ b/getting-started/eclipselink/README.md @@ -38,7 +38,7 @@ This example requires `jq` to be installed on your machine. 2. Start the docker compose group by running the following command from the root of the repository: ```shell - docker compose -f getting-started/eclipselink/docker-compose.yml up + docker compose -f getting-started/eclipselink/docker-compose-postgres.yml -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up ``` 3. Using spark-sql: attach to the running spark-sql container: @@ -71,5 +71,23 @@ This example requires `jq` to be installed on your machine. ```shell curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" - curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/management/v1/catalogs/polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" ``` + +6. Using Trino CLI: To access the Trino CLI, run this command: +``` +docker exec -it eclipselink-trino-1 trino +``` +Note, `trino-trino-1` is the name of the Docker container. + +Example Trino queries: +``` +SHOW CATALOGS; +SHOW SCHEMAS FROM iceberg; +SHOW TABLES FROM iceberg.information_schema; +DESCRIBE iceberg.information_schema.tables; + +CREATE SCHEMA iceberg.tpch; +CREATE TABLE iceberg.tpch.test_polaris AS SELECT 1 x; +SELECT * FROM iceberg.tpch.test_polaris; +``` diff --git a/getting-started/eclipselink/docker-compose-bootstrap-db.yml b/getting-started/eclipselink/docker-compose-bootstrap-db.yml new file mode 100644 index 0000000000..6e1ab80751 --- /dev/null +++ b/getting-started/eclipselink/docker-compose-bootstrap-db.yml @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-bootstrap: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris-admin-tool:postgres-latest + environment: + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + command: + - "bootstrap" + - "--realm=POLARIS" + - "--credential=POLARIS,root,s3cr3t" \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose-postgres.yml b/getting-started/eclipselink/docker-compose-postgres.yml new file mode 100644 index 0000000000..1e86a8c0ea --- /dev/null +++ b/getting-started/eclipselink/docker-compose-postgres.yml @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + postgres: + image: postgres:17.4 + ports: + - "5432:5432" + # set shared memory limit when using docker-compose + shm_size: 128mb + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: POLARIS + POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" + volumes: + # Bind local conf file to a convenient location in the container + - type: bind + source: ./postgresql.conf + target: /etc/postgresql/postgresql.conf + command: + - "postgres" + - "-c" + - "config_file=/etc/postgresql/postgresql.conf" + healthcheck: + test: "pg_isready -U postgres" + interval: 5s + timeout: 2s + retries: 15 \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 252ea53d64..4861675991 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -29,11 +29,6 @@ services: - "8182:8182" # Optional, allows attaching a debugger to the Polaris JVM - "5005:5005" - depends_on: - polaris-bootstrap: - condition: service_completed_successfully - postgres: - condition: service_healthy environment: JAVA_DEBUG: "true" JAVA_DEBUG_PORT: "*:5005" @@ -48,22 +43,7 @@ services: interval: 2s timeout: 10s retries: 10 - - polaris-bootstrap: - # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions - image: apache/polaris-admin-tool:postgres-latest - depends_on: - postgres: - condition: service_healthy - environment: - polaris.persistence.type: eclipse-link - polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml - volumes: - - ../assets/eclipselink/:/deployments/config/eclipselink - command: - - "bootstrap" - - "--realm=POLARIS" - - "--credential=POLARIS,root,s3cr3t" + start_period: 10s polaris-setup: image: alpine/curl @@ -74,37 +54,8 @@ services: - ../assets/polaris/:/polaris entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' - postgres: - image: postgres:17.4 - ports: - - "5432:5432" - # set shared memory limit when using docker-compose - shm_size: 128mb - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: POLARIS - POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" - volumes: - # Bind local conf file to a convenient location in the container - - type: bind - source: ./postgresql.conf - target: /etc/postgresql/postgresql.conf - command: - - "postgres" - - "-c" - - "config_file=/etc/postgresql/postgresql.conf" - healthcheck: - test: "pg_isready -U postgres" - interval: 5s - timeout: 2s - retries: 15 - spark-sql: image: apache/spark:3.5.5-java17-python3 - depends_on: - polaris-setup: - condition: service_completed_successfully stdin_open: true tty: true ports: @@ -119,7 +70,7 @@ services: --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", --conf, "spark.sql.catalog.polaris.type=rest", - --conf, "spark.sql.catalog.polaris.warehouse=polaris_demo", + --conf, "spark.sql.catalog.polaris.warehouse=quickstart_catalog", --conf, "spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog", --conf, "spark.sql.catalog.polaris.credential=root:s3cr3t", --conf, "spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL", @@ -127,3 +78,12 @@ services: --conf, "spark.sql.catalogImplementation=in-memory", --conf, "spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp" ] + + trino: + image: trinodb/trino:latest + stdin_open: true + tty: true + ports: + - "8080:8080" + volumes: + - ./trino-config/catalog:/etc/trino/catalog diff --git a/getting-started/trino/trino-config/catalog/iceberg.properties b/getting-started/eclipselink/trino-config/catalog/iceberg.properties similarity index 95% rename from getting-started/trino/trino-config/catalog/iceberg.properties rename to getting-started/eclipselink/trino-config/catalog/iceberg.properties index 1cd0a0e7a2..28c3c61faa 100644 --- a/getting-started/trino/trino-config/catalog/iceberg.properties +++ b/getting-started/eclipselink/trino-config/catalog/iceberg.properties @@ -23,6 +23,6 @@ iceberg.rest-catalog.uri=http://polaris:8181/api/catalog iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.oauth2.credential=root:s3cr3t iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL -iceberg.rest-catalog.warehouse=polaris +iceberg.rest-catalog.warehouse=quickstart_catalog # Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration fs.hadoop.enabled=true diff --git a/getting-started/trino/README.md b/getting-started/trino/README.md deleted file mode 100644 index 6b6acf1ef4..0000000000 --- a/getting-started/trino/README.md +++ /dev/null @@ -1,61 +0,0 @@ - - -# Getting Started with Trino and Apache Polaris - -This getting started guide provides a `docker-compose` file to set up [Trino](https://trino.io/) with Apache Polaris. Apache Polaris is configured as an Iceberg REST Catalog in Trino. - -## Build Polaris Image -Build Polaris Image while Docker is running -``` -./gradlew \ - :polaris-quarkus-server:assemble \ - :polaris-quarkus-server:quarkusAppPartsBuild --rerun \ - -Dquarkus.container-image.build=true -``` - -## Run the `docker-compose` file -To start the `docker-compose` file, run this command from the repo's root directory: -``` -docker-compose -f getting-started/trino/docker-compose.yml up -``` - -## Run Trino queries via Trino CLI -To access the Trino CLI, run this command: -``` -docker exec -it trino-trino-1 trino -``` -Note, `trino-trino-1` is the name of the Docker container. - -Example Trino queries: -``` -SHOW CATALOGS; -SHOW SCHEMAS FROM iceberg; -SHOW TABLES FROM iceberg.information_schema; -DESCRIBE iceberg.information_schema.tables; - -CREATE SCHEMA iceberg.tpch; -CREATE TABLE iceberg.tpch.test_polaris AS SELECT 1 x; -SELECT * FROM iceberg.tpch.test_polaris; -``` - -## Note -The Polaris in this example is started with realm `default-realm` and root credentials: `root:s3cr3t`. - -An example catalog is created in Apache Polaris using the `curl` command. See `create-polaris-catalog.sh` for details. diff --git a/getting-started/trino/create-polaris-catalog.sh b/getting-started/trino/create-polaris-catalog.sh deleted file mode 100644 index e08e1c5f88..0000000000 --- a/getting-started/trino/create-polaris-catalog.sh +++ /dev/null @@ -1,61 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -if ! output=$(curl -X POST -H "Polaris-Realm: default-realm" "http://polaris:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=s3cr3t" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi - -token=$(echo "$output" | awk -F\" '{print $4}') - -if [ "$token" == "unauthorized_client" ]; then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi - -PRINCIPAL_TOKEN=$token - -# Use local filesystem by default -curl -i -X POST -H "Authorization: Bearer $PRINCIPAL_TOKEN" -H 'Accept: application/json' -H 'Content-Type: application/json' \ - http://polaris:8181/api/management/v1/catalogs \ - -d '{ - "catalog": { - "name": "polaris", - "type": "INTERNAL", - "readOnly": false, - "properties": { - "default-base-location": "file:///tmp/polaris/" - }, - "storageConfigInfo": { - "storageType": "FILE", - "allowedLocations": [ - "file:///tmp" - ] - } - } - }' - -# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata -curl -i -X PUT -H "Authorization: Bearer $PRINCIPAL_TOKEN" -H 'Accept: application/json' -H 'Content-Type: application/json' \ - http://polaris:8181/api/management/v1/catalogs/polaris/catalog-roles/catalog_admin/grants \ - -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' diff --git a/getting-started/trino/docker-compose.yml b/getting-started/trino/docker-compose.yml deleted file mode 100644 index fd438f0094..0000000000 --- a/getting-started/trino/docker-compose.yml +++ /dev/null @@ -1,58 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -services: - polaris: - image: apache/polaris:latest - ports: - - "8181:8181" - - "8182" - environment: - AWS_REGION: us-west-2 - AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY - GOOGLE_APPLICATION_CREDENTIALS: $GOOGLE_APPLICATION_CREDENTIALS - AZURE_TENANT_ID: $AZURE_TENANT_ID - AZURE_CLIENT_ID: $AZURE_CLIENT_ID - AZURE_CLIENT_SECRET: $AZURE_CLIENT_SECRET - POLARIS_BOOTSTRAP_CREDENTIALS: default-realm,root,s3cr3t - polaris.realm-context.realms: default-realm - quarkus.otel.sdk.disabled: "true" - - healthcheck: - test: ["CMD", "curl", "http://localhost:8182/healthcheck"] - interval: 10s - timeout: 10s - retries: 5 - - create-polaris-catalog: - image: curlimages/curl - depends_on: - polaris: - condition: service_healthy - volumes: - - ./create-polaris-catalog.sh:/create-polaris-catalog.sh - command: ["/bin/sh", "/create-polaris-catalog.sh"] - - trino: - image: trinodb/trino:latest - ports: - - "8080:8080" - volumes: - - ./trino-config/catalog:/etc/trino/catalog diff --git a/site/content/in-dev/unreleased/_index.md b/site/content/in-dev/unreleased/_index.md index 19100e7123..b4e147ecda 100644 --- a/site/content/in-dev/unreleased/_index.md +++ b/site/content/in-dev/unreleased/_index.md @@ -37,7 +37,7 @@ These pages refer to the current state of the main branch, which is still under Functionalities can be changed, removed or added without prior notice. {{< /alert >}} -Check out the [Quick Start]({{% ref "quickstart" %}}) page to get started. +Check out the [Quickstart]({{% ref "getting-started" %}}) page to get started.