From f56dd77a181742de3267872873150c628d7d4ba0 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Thu, 23 Jan 2025 17:55:11 +0100 Subject: [PATCH 1/7] Add docker/podman compose examples to get started with Polaris --- README.md | 3 + docker/README.md | 25 +++++ docker/assets/eclipselink/persistence.xml | 43 ++++++++ docker/assets/polaris/create-catalog.sh | 62 +++++++++++ docker/assets/prometheus/prometheus.yml | 30 ++++++ docker/elipselink/README.md | 51 +++++++++ docker/elipselink/docker-compose.yml | 121 ++++++++++++++++++++++ docker/in-memory/README.md | 30 ++++++ docker/in-memory/docker-compose.yml | 38 +++++++ docker/telemetry/README.md | 36 +++++++ docker/telemetry/docker-compose.yml | 82 +++++++++++++++ 11 files changed, 521 insertions(+) create mode 100644 docker/README.md create mode 100644 docker/assets/eclipselink/persistence.xml create mode 100755 docker/assets/polaris/create-catalog.sh create mode 100644 docker/assets/prometheus/prometheus.yml create mode 100644 docker/elipselink/README.md create mode 100644 docker/elipselink/docker-compose.yml create mode 100644 docker/in-memory/README.md create mode 100644 docker/in-memory/docker-compose.yml create mode 100644 docker/telemetry/README.md create mode 100644 docker/telemetry/docker-compose.yml diff --git a/README.md b/README.md index 1e1e9ed54a..abcb748f88 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,9 @@ select * from db1.table1; build the image locally. - `docker run -p 8181:8181 -p 8182:8182 apache/polaris:latest` - To run the image. +The Polaris codebase contains some docker compose examples to quickly get started with Polaris, +using different configurations. Check the `./docker` directory for more information. + #### Running in Kubernetes - `./run.sh` - To run Polaris as a mini-deployment locally. This will create a Kind cluster, diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000000..9b5f9d7202 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,25 @@ +# Podman/Docker Compose Examples + +You can quickly get started with Polaris by playing with the compose examples provided in this +directory. The examples are designed to be run with `docker-compose` or `podman-compose`. Each +example has detailed instructions. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) or [Podman](https://podman.io/docs/installation) +- [Docker Compose](https://docs.docker.com/compose/install/) + or [Podman Compose](https://docs.podman.io/en/v5.1.1/markdown/podman-compose.1.html) +- [jq](https://stedolan.github.io/jq/download/) (for some examples) + +## Examples + +- [In-Memory](./in-memory): A simple example that uses an in-memory metastore, automatically + bootstrapped. + +- [Telemetry](./telemetry): An example that includes Prometheus and Jaeger to collect metrics and + traces from Polaris. This example automatically creates a `polaris_demo` catalog. + +- [Eclipselink](./elipselink): An example that uses an Eclipselink metastore and a Postgres + database. The realm is bootstrapped with the Polaris Admin tool. This example also creates a + `polaris_demo` catalog, and offers the ability to run Spark SQL queries. Finally, it shows how to + attach a debugger to the Polaris server. diff --git a/docker/assets/eclipselink/persistence.xml b/docker/assets/eclipselink/persistence.xml new file mode 100644 index 0000000000..38c3676ded --- /dev/null +++ b/docker/assets/eclipselink/persistence.xml @@ -0,0 +1,43 @@ + + + + + + org.eclipse.persistence.jpa.PersistenceProvider + org.apache.polaris.jpa.models.ModelEntity + org.apache.polaris.jpa.models.ModelEntityActive + org.apache.polaris.jpa.models.ModelEntityChangeTracking + org.apache.polaris.jpa.models.ModelEntityDropped + org.apache.polaris.jpa.models.ModelGrantRecord + org.apache.polaris.jpa.models.ModelPrincipalSecrets + org.apache.polaris.jpa.models.ModelSequenceId + NONE + + + + + + + + + \ No newline at end of file diff --git a/docker/assets/polaris/create-catalog.sh b/docker/assets/polaris/create-catalog.sh new file mode 100755 index 0000000000..961a0d2163 --- /dev/null +++ b/docker/assets/polaris/create-catalog.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --user root:s3cr3t \ + -d grant_type=client_credentials \ + -d scope=PRINCIPAL_ROLE:ALL | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') + +if [ -z "${token}" ]; then + echo "Failed to obtain access token." + exit 1 +fi + +echo +echo "Obtained access token: ${token}" + +echo +echo Creating a catalog named polaris_demo... + +curl -s -H "Authorization: Bearer ${token}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://polaris:8181/api/management/v1/catalogs \ + -d '{ + "catalog": { + "name": "polaris_demo", + "type": "INTERNAL", + "readOnly": false, + "properties": { + "default-base-location": "file:///tmp/polaris/" + }, + "storageConfigInfo": { + "storageType": "FILE", + "allowedLocations": [ + "file:///tmp" + ] + } + } + }' + +echo +echo Done. \ No newline at end of file diff --git a/docker/assets/prometheus/prometheus.yml b/docker/assets/prometheus/prometheus.yml new file mode 100644 index 0000000000..99c779ba73 --- /dev/null +++ b/docker/assets/prometheus/prometheus.yml @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +global: + scrape_interval: 15s + evaluation_interval: 15s +scrape_configs: + - job_name: 'polaris' + scrape_interval: 5s + metrics_path: /q/metrics + static_configs: + - targets: ['polaris:8182'] + labels: + service: polaris diff --git a/docker/elipselink/README.md b/docker/elipselink/README.md new file mode 100644 index 0000000000..a1eba96103 --- /dev/null +++ b/docker/elipselink/README.md @@ -0,0 +1,51 @@ +# Polaris with EclipseLink, Postgres and Spark SQL + +1. If such an image is not already present, build the Polaris image with support for EclipseLink and + the Postgres JDBC driver: + + ```shell + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group using either podman or docker: + + ```shell + podman compose -f docker/eclipselink/docker-compose.yml up + docker compose -f docker/eclipselink/docker-compose.yml up + ``` + +3. Using spark-sql: attach to the running spark-sql container: + + ```shell + podman attach $(podman ps -q --filter name=spark-sql) + docker attach $(docker ps -q --filter name=spark-sql) + ``` + + You may not see Spark's prompt immediately, type ENTER to see it. A few commands that you can try: + + ```sql + CREATE NAMESPACE polaris.ns1; + USE polaris.ns1; + CREATE TABLE table1 (id int, name string); + INSERT INTO table1 VALUES (1, 'a'); + SELECT * FROM table1; + ``` + +4. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +5. Then, use the access token in the Authorization header when accessing Polaris: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + ``` diff --git a/docker/elipselink/docker-compose.yml b/docker/elipselink/docker-compose.yml new file mode 100644 index 0000000000..4940285934 --- /dev/null +++ b/docker/elipselink/docker-compose.yml @@ -0,0 +1,121 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + polaris: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Management port (metrics and health checks) + - "8182:8182" + # Optional, allows attaching a debugger to the Polaris JVM + - "5005:5005" + depends_on: + polaris-bootstrap: + condition: service_completed_successfully + postgres: + condition: service_healthy + environment: + JAVA_DEBUG: "true" + JAVA_DEBUG_PORT: "*:5005" + quarkus.otel.exporter.otlp.endpoint: http://jaeger:4317 + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + polaris.realm-context.realms: POLARIS + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + + polaris-bootstrap: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris-admin-tool:latest + depends_on: + postgres: + condition: service_healthy + environment: + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + command: + - "bootstrap" + - "--realm=POLARIS" + - "--credential=POLARIS,root,s3cr3t" + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/polaris/:/polaris + entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' + + postgres: + image: postgres:17.2 + ports: + - "5432:5432" + # set shared memory limit when using docker-compose + shm_size: 128mb + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: POLARIS + POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" + healthcheck: + test: "pg_isready -U postgres" + interval: 5s + timeout: 2s + retries: 15 + + spark-sql: + image: apache/spark:3.5.4-java17-python3 + depends_on: + polaris-setup: + condition: service_completed_successfully + stdin_open: true + tty: true + ports: + - "4040-4045:4040-4045" + healthcheck: + test: "curl localhost:4040" + interval: 5s + retries: 15 + command: [ + /opt/spark/bin/spark-sql, + --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17", + --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", + --conf, "spark.sql.catalog.polaris.type=rest", + --conf, "spark.sql.catalog.polaris.warehouse=polaris_demo", + --conf, "spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog", + --conf, "spark.sql.catalog.polaris.credential=root:s3cr3t", + --conf, "spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL", + --conf, "spark.sql.defaultCatalog=polaris", + --conf, "spark.sql.catalogImplementation=in-memory", + ] + volumes: + - ~/.ivy2:/home/spark/.ivy2 diff --git a/docker/in-memory/README.md b/docker/in-memory/README.md new file mode 100644 index 0000000000..1d463ec127 --- /dev/null +++ b/docker/in-memory/README.md @@ -0,0 +1,30 @@ +# Polaris In-Memory + +1. Build the Polaris image if it's not already present locally: + + ```shell + ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group using either podman or docker: + + ```shell + podman compose -f docker/in-memory/docker-compose.yml up + docker compose -f docker/in-memory/docker-compose.yml up + ``` + +3. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +4. Then, use the access token in the Authorization header when accessing Polaris: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" + ``` diff --git a/docker/in-memory/docker-compose.yml b/docker/in-memory/docker-compose.yml new file mode 100644 index 0000000000..5b9839da62 --- /dev/null +++ b/docker/in-memory/docker-compose.yml @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Management port (metrics and health checks) + - "8182:8182" + environment: + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + polaris.persistence.type: in-memory + polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 5 + diff --git a/docker/telemetry/README.md b/docker/telemetry/README.md new file mode 100644 index 0000000000..31bba2ea1a --- /dev/null +++ b/docker/telemetry/README.md @@ -0,0 +1,36 @@ +# Polaris with Prometheus and Jaeger + +1. Build the Polaris image if it's not already present locally: + + ```shell + ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group using either podman or docker: + + ```shell + podman compose -f docker/telemetry/docker-compose.yml up + docker compose -f docker/telemetry/docker-compose.yml up + ``` + +3. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +4. Then, use the access token in the Authorization header when accessing Polaris: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + ``` + +5. Access the following services: + + - Prometheus UI: browse to http://localhost:9093 to view metrics. + - Jaeger UI: browse to http://localhost:16686 to view traces. diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml new file mode 100644 index 0000000000..17cef58464 --- /dev/null +++ b/docker/telemetry/docker-compose.yml @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Management port (metrics and health checks) + - "8182:8182" + depends_on: + jaeger: + condition: service_healthy + environment: + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + quarkus.otel.exporter.otlp.endpoint: http://jaeger:4317 + polaris.realm-context.realms: POLARIS + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/polaris/:/polaris + entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' + + prometheus: + image: docker.io/prom/prometheus:v3.1.0 + ports: + - "9093:9090" + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/prometheus/:/etc/prometheus/ + command: + - --config.file=/etc/prometheus/prometheus.yml + healthcheck: + test: "wget -O /dev/null -o /dev/null http://localhost:9090" + interval: 5s + timeout: 2s + retries: 15 + + # Jaeger (OpenTelemetry traces collector) + jaeger: + image: docker.io/jaegertracing/all-in-one:1.65.0 + ports: + # Jaeger gRPC collector, used by Polaris + - "4317:4317" + # Jaeger UI + - "16686:16686" + environment: + - COLLECTOR_OTLP_ENABLED=true + healthcheck: + test: "echo -e 'GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n' | nc localhost 16686 | grep -q '200 OK'" + interval: 5s + timeout: 2s + retries: 15 From b96819fc581a4704cc7776630fe487e0bc54f5d1 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Thu, 23 Jan 2025 21:23:49 +0100 Subject: [PATCH 2/7] review --- docker/elipselink/README.md | 5 +++++ docker/elipselink/docker-compose.yml | 4 ++-- docker/in-memory/README.md | 2 ++ docker/telemetry/README.md | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docker/elipselink/README.md b/docker/elipselink/README.md index a1eba96103..3b64643b38 100644 --- a/docker/elipselink/README.md +++ b/docker/elipselink/README.md @@ -6,6 +6,7 @@ ```shell ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ -Dquarkus.container-image.build=true ``` @@ -13,6 +14,8 @@ ```shell podman compose -f docker/eclipselink/docker-compose.yml up + ``` + ```shell docker compose -f docker/eclipselink/docker-compose.yml up ``` @@ -20,6 +23,8 @@ ```shell podman attach $(podman ps -q --filter name=spark-sql) + ``` + ```shell docker attach $(docker ps -q --filter name=spark-sql) ``` diff --git a/docker/elipselink/docker-compose.yml b/docker/elipselink/docker-compose.yml index 4940285934..d59a45d5d4 100644 --- a/docker/elipselink/docker-compose.yml +++ b/docker/elipselink/docker-compose.yml @@ -21,7 +21,7 @@ services: polaris: # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions - image: apache/polaris:latest + image: apache/polaris:postgres-latest ports: # API port - "8181:8181" @@ -51,7 +51,7 @@ services: polaris-bootstrap: # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions - image: apache/polaris-admin-tool:latest + image: apache/polaris-admin-tool:postgres-latest depends_on: postgres: condition: service_healthy diff --git a/docker/in-memory/README.md b/docker/in-memory/README.md index 1d463ec127..6e1ebcae34 100644 --- a/docker/in-memory/README.md +++ b/docker/in-memory/README.md @@ -10,6 +10,8 @@ ```shell podman compose -f docker/in-memory/docker-compose.yml up + ``` + ```shell docker compose -f docker/in-memory/docker-compose.yml up ``` diff --git a/docker/telemetry/README.md b/docker/telemetry/README.md index 31bba2ea1a..85cb094909 100644 --- a/docker/telemetry/README.md +++ b/docker/telemetry/README.md @@ -10,6 +10,8 @@ ```shell podman compose -f docker/telemetry/docker-compose.yml up + ``` + ```shell docker compose -f docker/telemetry/docker-compose.yml up ``` From 11ecee41eb34f0467242cd70e7b8658e015f676d Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Thu, 23 Jan 2025 21:23:58 +0100 Subject: [PATCH 3/7] fix bash not found --- docker/assets/polaris/create-catalog.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/assets/polaris/create-catalog.sh b/docker/assets/polaris/create-catalog.sh index 961a0d2163..f069c66376 100755 --- a/docker/assets/polaris/create-catalog.sh +++ b/docker/assets/polaris/create-catalog.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file From 13bc2133225c57692e6efed7a03c00ba4a513454 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Thu, 23 Jan 2025 21:45:29 +0100 Subject: [PATCH 4/7] polishing --- docker/elipselink/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/elipselink/docker-compose.yml b/docker/elipselink/docker-compose.yml index d59a45d5d4..d9b5abc2d1 100644 --- a/docker/elipselink/docker-compose.yml +++ b/docker/elipselink/docker-compose.yml @@ -37,10 +37,10 @@ services: environment: JAVA_DEBUG: "true" JAVA_DEBUG_PORT: "*:5005" - quarkus.otel.exporter.otlp.endpoint: http://jaeger:4317 polaris.persistence.type: eclipse-link polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" volumes: - ../assets/eclipselink/:/deployments/config/eclipselink healthcheck: From f2b4d2223cdafe4bb10541b088bfe26f52376c80 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Sat, 25 Jan 2025 13:20:11 +0100 Subject: [PATCH 5/7] move things to getting-started folder + review --- docker/README.md | 25 -- docker/elipselink/README.md | 56 ---- docker/in-memory/README.md | 32 -- docker/in-memory/docker-compose.yml | 38 --- docker/telemetry/README.md | 38 --- getting-started/README.md | 44 +++ .../assets/eclipselink/persistence.xml | 0 .../assets/polaris/create-catalog.sh | 0 .../assets/prometheus/prometheus.yml | 0 getting-started/elipselink/README.md | 71 +++++ .../elipselink/docker-compose.yml | 0 getting-started/spark/README.md | 16 +- getting-started/spark/docker-compose.yml | 8 +- .../spark/notebooks/SparkPolaris.ipynb | 300 ++++++++++++++++-- getting-started/telemetry/README.md | 61 ++++ .../telemetry/docker-compose.yml | 11 + 16 files changed, 471 insertions(+), 229 deletions(-) delete mode 100644 docker/README.md delete mode 100644 docker/elipselink/README.md delete mode 100644 docker/in-memory/README.md delete mode 100644 docker/in-memory/docker-compose.yml delete mode 100644 docker/telemetry/README.md create mode 100644 getting-started/README.md rename {docker => getting-started}/assets/eclipselink/persistence.xml (100%) rename {docker => getting-started}/assets/polaris/create-catalog.sh (100%) rename {docker => getting-started}/assets/prometheus/prometheus.yml (100%) create mode 100644 getting-started/elipselink/README.md rename {docker => getting-started}/elipselink/docker-compose.yml (100%) create mode 100644 getting-started/telemetry/README.md rename {docker => getting-started}/telemetry/docker-compose.yml (76%) diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 9b5f9d7202..0000000000 --- a/docker/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Podman/Docker Compose Examples - -You can quickly get started with Polaris by playing with the compose examples provided in this -directory. The examples are designed to be run with `docker-compose` or `podman-compose`. Each -example has detailed instructions. - -## Prerequisites - -- [Docker](https://docs.docker.com/get-docker/) or [Podman](https://podman.io/docs/installation) -- [Docker Compose](https://docs.docker.com/compose/install/) - or [Podman Compose](https://docs.podman.io/en/v5.1.1/markdown/podman-compose.1.html) -- [jq](https://stedolan.github.io/jq/download/) (for some examples) - -## Examples - -- [In-Memory](./in-memory): A simple example that uses an in-memory metastore, automatically - bootstrapped. - -- [Telemetry](./telemetry): An example that includes Prometheus and Jaeger to collect metrics and - traces from Polaris. This example automatically creates a `polaris_demo` catalog. - -- [Eclipselink](./elipselink): An example that uses an Eclipselink metastore and a Postgres - database. The realm is bootstrapped with the Polaris Admin tool. This example also creates a - `polaris_demo` catalog, and offers the ability to run Spark SQL queries. Finally, it shows how to - attach a debugger to the Polaris server. diff --git a/docker/elipselink/README.md b/docker/elipselink/README.md deleted file mode 100644 index 3b64643b38..0000000000 --- a/docker/elipselink/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Polaris with EclipseLink, Postgres and Spark SQL - -1. If such an image is not already present, build the Polaris image with support for EclipseLink and - the Postgres JDBC driver: - - ```shell - ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ - -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ - -Dquarkus.container-image.tag=postgres-latest \ - -Dquarkus.container-image.build=true - ``` - -2. Start the docker compose group using either podman or docker: - - ```shell - podman compose -f docker/eclipselink/docker-compose.yml up - ``` - ```shell - docker compose -f docker/eclipselink/docker-compose.yml up - ``` - -3. Using spark-sql: attach to the running spark-sql container: - - ```shell - podman attach $(podman ps -q --filter name=spark-sql) - ``` - ```shell - docker attach $(docker ps -q --filter name=spark-sql) - ``` - - You may not see Spark's prompt immediately, type ENTER to see it. A few commands that you can try: - - ```sql - CREATE NAMESPACE polaris.ns1; - USE polaris.ns1; - CREATE TABLE table1 (id int, name string); - INSERT INTO table1 VALUES (1, 'a'); - SELECT * FROM table1; - ``` - -4. To access Polaris from the host machine, first request an access token: - - ```shell - export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ - --resolve polaris:8181:127.0.0.1 \ - --user root:s3cr3t \ - -d 'grant_type=client_credentials' \ - -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) - ``` - -5. Then, use the access token in the Authorization header when accessing Polaris: - - ```shell - curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" - curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" - ``` diff --git a/docker/in-memory/README.md b/docker/in-memory/README.md deleted file mode 100644 index 6e1ebcae34..0000000000 --- a/docker/in-memory/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Polaris In-Memory - -1. Build the Polaris image if it's not already present locally: - - ```shell - ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true - ``` - -2. Start the docker compose group using either podman or docker: - - ```shell - podman compose -f docker/in-memory/docker-compose.yml up - ``` - ```shell - docker compose -f docker/in-memory/docker-compose.yml up - ``` - -3. To access Polaris from the host machine, first request an access token: - - ```shell - export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ - --resolve polaris:8181:127.0.0.1 \ - --user root:s3cr3t \ - -d 'grant_type=client_credentials' \ - -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) - ``` - -4. Then, use the access token in the Authorization header when accessing Polaris: - - ```shell - curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" - ``` diff --git a/docker/in-memory/docker-compose.yml b/docker/in-memory/docker-compose.yml deleted file mode 100644 index 5b9839da62..0000000000 --- a/docker/in-memory/docker-compose.yml +++ /dev/null @@ -1,38 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -services: - polaris: - image: apache/polaris:latest - ports: - # API port - - "8181:8181" - # Management port (metrics and health checks) - - "8182:8182" - environment: - POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t - polaris.persistence.type: in-memory - polaris.realm-context.realms: POLARIS - quarkus.otel.sdk.disabled: "true" - healthcheck: - test: ["CMD", "curl", "http://localhost:8182/q/health"] - interval: 2s - timeout: 10s - retries: 5 - diff --git a/docker/telemetry/README.md b/docker/telemetry/README.md deleted file mode 100644 index 85cb094909..0000000000 --- a/docker/telemetry/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Polaris with Prometheus and Jaeger - -1. Build the Polaris image if it's not already present locally: - - ```shell - ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true - ``` - -2. Start the docker compose group using either podman or docker: - - ```shell - podman compose -f docker/telemetry/docker-compose.yml up - ``` - ```shell - docker compose -f docker/telemetry/docker-compose.yml up - ``` - -3. To access Polaris from the host machine, first request an access token: - - ```shell - export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ - --resolve polaris:8181:127.0.0.1 \ - --user root:s3cr3t \ - -d 'grant_type=client_credentials' \ - -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) - ``` - -4. Then, use the access token in the Authorization header when accessing Polaris: - - ```shell - curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" - curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" - ``` - -5. Access the following services: - - - Prometheus UI: browse to http://localhost:9093 to view metrics. - - Jaeger UI: browse to http://localhost:16686 to view traces. diff --git a/getting-started/README.md b/getting-started/README.md new file mode 100644 index 0000000000..e36e306465 --- /dev/null +++ b/getting-started/README.md @@ -0,0 +1,44 @@ + + +# Getting Started with Apache Polaris + +You can quickly get started with Polaris by playing with the docker-compose examples provided in +this directory. Each example has detailed instructions. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) +- [jq](https://stedolan.github.io/jq/download/) (for some examples) + +## Getting Started Examples + +- [Spark](spark): An example that uses an in-memory metastore, automatically bootstrapped, with + Apache Spark and a Jupyter notebook. + +- [Trino](trino): An example that uses Trino with Polaris. + +- [Telemetry](telemetry): An example that includes Prometheus and Jaeger to collect metrics and + traces from Apache Polaris. This example automatically creates a `polaris_demo` catalog. + +- [Eclipselink](elipselink): An example that uses an Eclipselink metastore and a Postgres + database. The realm is bootstrapped with the Polaris Admin tool. This example also creates a + `polaris_demo` catalog, and offers the ability to run Spark SQL queries. Finally, it shows how to + attach a debugger to the Polaris server. diff --git a/docker/assets/eclipselink/persistence.xml b/getting-started/assets/eclipselink/persistence.xml similarity index 100% rename from docker/assets/eclipselink/persistence.xml rename to getting-started/assets/eclipselink/persistence.xml diff --git a/docker/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh similarity index 100% rename from docker/assets/polaris/create-catalog.sh rename to getting-started/assets/polaris/create-catalog.sh diff --git a/docker/assets/prometheus/prometheus.yml b/getting-started/assets/prometheus/prometheus.yml similarity index 100% rename from docker/assets/prometheus/prometheus.yml rename to getting-started/assets/prometheus/prometheus.yml diff --git a/getting-started/elipselink/README.md b/getting-started/elipselink/README.md new file mode 100644 index 0000000000..793b7e9623 --- /dev/null +++ b/getting-started/elipselink/README.md @@ -0,0 +1,71 @@ + + +# Getting Started with Apache Polaris, EclipseLink, Postgres and Spark SQL + +This example requires `jq` to be installed on your machine. + +1. If such an image is not already present, build the Polaris image with support for EclipseLink and + the Postgres JDBC driver: + + ```shell + ./gradlew :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/eclipselink/docker-compose.yml up + ``` + +3. Using spark-sql: attach to the running spark-sql container: + + ```shell + docker attach $(docker ps -q --filter name=spark-sql) + ``` + + You may not see Spark's prompt immediately, type ENTER to see it. A few commands that you can try: + + ```sql + CREATE NAMESPACE polaris.ns1; + USE polaris.ns1; + CREATE TABLE table1 (id int, name string); + INSERT INTO table1 VALUES (1, 'a'); + SELECT * FROM table1; + ``` + +4. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +5. Then, use the access token in the Authorization header when accessing Polaris: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + ``` diff --git a/docker/elipselink/docker-compose.yml b/getting-started/elipselink/docker-compose.yml similarity index 100% rename from docker/elipselink/docker-compose.yml rename to getting-started/elipselink/docker-compose.yml diff --git a/getting-started/spark/README.md b/getting-started/spark/README.md index 55e4f9d941..793fbf0528 100644 --- a/getting-started/spark/README.md +++ b/getting-started/spark/README.md @@ -22,10 +22,19 @@ This getting started guide provides a `docker-compose` file to set up [Apache Spark](https://spark.apache.org/) with Apache Polaris. Apache Polaris is configured as an Iceberg REST Catalog in Spark. A Jupyter notebook is used to run PySpark. +## Build the Polaris image + +If a Polaris image is not already present locally, build one with the following command: + +```shell +./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true +``` + ## Run the `docker-compose` file + To start the `docker-compose` file, run this command from the repo's root directory: -``` -docker-compose -f getting-started/spark/docker-compose.yml up +```shell +docker-compose -f getting-started/spark/docker-compose.yml up ``` This will spin up 2 container services @@ -38,8 +47,5 @@ In the Jupyter notebook container log, look for the URL to access the Jupyter no Open the Jupyter notebook in a browser. Navigate to [`notebooks/SparkPolaris.ipynb`](http://127.0.0.1:8888/lab/tree/notebooks/SparkPolaris.ipynb) -## Change the Polaris credential -The Polaris service will create a new root crendential on startup, find this credential in the Polaris service log and change the `polaris_credential` variable in the first cell of the jupyter notebook - ## Run the Jupyter notebook You can now run all cells in the notebook or write your own code! diff --git a/getting-started/spark/docker-compose.yml b/getting-started/spark/docker-compose.yml index 3c6468c825..a6d51f3a99 100644 --- a/getting-started/spark/docker-compose.yml +++ b/getting-started/spark/docker-compose.yml @@ -19,9 +19,7 @@ services: polaris: - build: - context: ../../ - network: host + image: apache/polaris:latest ports: - "8181:8181" - "8182" @@ -29,7 +27,9 @@ services: AWS_REGION: us-west-2 AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY - + POLARIS_BOOTSTRAP_CREDENTIALS: default-realm,root,s3cr3t + polaris.realm-context.realms: default-realm + quarkus.otel.sdk.disabled: "true" healthcheck: test: ["CMD", "curl", "http://localhost:8182/healthcheck"] interval: 10s diff --git a/getting-started/spark/notebooks/SparkPolaris.ipynb b/getting-started/spark/notebooks/SparkPolaris.ipynb index 69d0bb3f26..ec4b0f649d 100644 --- a/getting-started/spark/notebooks/SparkPolaris.ipynb +++ b/getting-started/spark/notebooks/SparkPolaris.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "f982815a-2b48-46ab-96a6-20dad7ec1420", "metadata": {}, "outputs": [], @@ -21,9 +21,7 @@ "from polaris.catalog.api_client import ApiClient as CatalogApiClient\n", "from polaris.catalog.api_client import Configuration as CatalogApiClientConfiguration\n", "\n", - "# (CHANGE ME): This credential changes on every Polaris service restart\n", - "# In the Polaris log, look for the `realm: default-realm root principal credentials:` string\n", - "polaris_credential = '35df9f8a34199df0:101b9d35700032416210ad2d39b1b4e3' # pragma: allowlist secret\n", + "polaris_credential = 'root:s3cr3t' # pragma: allowlist secret\n", "\n", "client_id, client_secret = polaris_credential.split(\":\")\n", "client = CatalogApiClient(CatalogApiClientConfiguration(username=client_id,\n", @@ -50,10 +48,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0f7a311a-9a55-4ff7-a40e-db3c74c53b9b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1737806925560, last_update_timestamp=1737806925560, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from polaris.management import *\n", "\n", @@ -81,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "e3e42c12-4e01-4577-bdf5-90c2704a5de8", "metadata": {}, "outputs": [], @@ -133,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "c5ceb5ca-f977-46c7-b2a6-07dda59e8a8b", "metadata": {}, "outputs": [], @@ -177,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "b51a6433-99c9-46c5-a855-928e30bad6e5", "metadata": {}, "outputs": [], @@ -247,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "fd13f24b-9d59-470d-9be1-660c22dde680", "metadata": { "tags": [] @@ -296,12 +305,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "72e9e5fb-b22e-4d38-bb1e-4ca78c0d0f3e", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------+\n", + "|namespace|\n", + "+---------+\n", + "+---------+\n", + "\n" + ] + } + ], "source": [ "spark.sql(\"USE polaris\")\n", "spark.sql(\"SHOW NAMESPACES\").show()" @@ -317,12 +338,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "54159ab2-5964-49a0-8202-a4b64ee4f9e7", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------+\n", + "| namespace|\n", + "+-------------------+\n", + "|COLLADO_TEST.PUBLIC|\n", + "+-------------------+\n", + "\n" + ] + } + ], "source": [ "spark.sql(\"CREATE NAMESPACE IF NOT EXISTS COLLADO_TEST\")\n", "spark.sql(\"CREATE NAMESPACE IF NOT EXISTS COLLADO_TEST.PUBLIC\")\n", @@ -339,12 +373,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4abc8426-7f2a-4f3f-9e26-1f1824f870c6", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DataFrame[]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "spark.sql(\"USE NAMESPACE COLLADO_TEST.PUBLIC\")\n", "spark.sql(\"\"\"CREATE TABLE IF NOT EXISTS TEST_TABLE (\n", @@ -364,12 +409,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "ff5a466d-6a67-4f42-a6a6-ac54ec258e54", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+----+\n", + "| id|data|\n", + "+---+----+\n", + "+---+----+\n", + "\n" + ] + } + ], "source": [ "spark.sql(\"SELECT * FROM TEST_TABLE\").show()" ] @@ -385,12 +442,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d7ab2991-6de9-4105-9f95-4c9f1c18f426", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+-------------+\n", + "| id| data|\n", + "+---+-------------+\n", + "| 1| some data|\n", + "| 2| more data|\n", + "| 3|yet more data|\n", + "+---+-------------+\n", + "\n" + ] + } + ], "source": [ "spark.sql(\"INSERT INTO TEST_TABLE VALUES (1, 'some data'), (2, 'more data'), (3, 'yet more data')\")\n", "spark.sql(\"SELECT * FROM TEST_TABLE\").show()" @@ -407,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "75131c37-2ad2-4e6d-bd65-64a915bb6694", "metadata": {}, "outputs": [], @@ -441,10 +513,121 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "cf24575d-4bfd-456c-962c-82f80fda5cc0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/json": { + "metadata": { + "current-schema-id": 0, + "current-snapshot-id": 7731655228716575000, + "default-sort-order-id": 0, + "default-spec-id": 0, + "format-version": 2, + "last-column-id": 2, + "last-partition-id": 999, + "last-sequence-number": 1, + "last-updated-ms": 1737806988261, + "location": "file:///tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE", + "metadata-log": [ + { + "metadata-file": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/00000-3ca446e8-576f-4dd6-b755-5870a41994c3.metadata.json", + "timestamp-ms": 1737806977962 + } + ], + "partition-specs": [ + { + "fields": [] + } + ], + "properties": { + "created-at": "2025-01-25T12:09:37.942122132Z", + "owner": "jovyan", + "write.parquet.compression-codec": "zstd" + }, + "refs": { + "main": { + "snapshot-id": 7731655228716575000, + "type": "branch" + } + }, + "schemas": [ + { + "fields": [ + { + "doc": "unique id", + "id": 1, + "name": "id", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "data", + "required": false, + "type": "string" + } + ], + "type": "struct" + } + ], + "snapshot-log": [ + { + "snapshot-id": 7731655228716575000, + "timestamp-ms": 1737806988261 + } + ], + "snapshots": [ + { + "manifest-list": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/snap-7731655228716575024-1-1ec6c15b-54c4-4bc5-a950-07852d066caf.avro", + "schema-id": 0, + "sequence-number": 1, + "snapshot-id": 7731655228716575000, + "summary": { + "added-data-files": "3", + "added-files-size": "2131", + "added-records": "3", + "app-id": "local-1737806967496", + "changed-partition-count": "1", + "engine-name": "spark", + "engine-version": "3.5.0", + "iceberg-version": "Apache Iceberg 1.7.1 (commit 4a432839233f2343a9eae8255532f911f06358ef)", + "operation": "append", + "spark.app.id": "local-1737806967496", + "total-data-files": "3", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "2131", + "total-position-deletes": "0", + "total-records": "3" + }, + "timestamp-ms": 1737806988261 + } + ], + "sort-orders": [ + { + "fields": [] + } + ], + "table-uuid": "970e002a-9611-4649-880a-7bf1e7905284" + }, + "metadata-location": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/00001-16b7ebd0-8166-4d43-a13b-a4b42db01e17.metadata.json" + }, + "text/plain": [ + "" + ] + }, + "metadata": { + "application/json": { + "expanded": true, + "root": "root" + } + }, + "output_type": "display_data" + } + ], "source": [ "import codecs\n", "import json\n", @@ -469,10 +652,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "6f3aac79-bf45-4603-bd64-30eeab4bdfa7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DataFrame[]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The new spark session inherits everything from the previous session except for the overridden credentials\n", "new_spark = spark.newSession()\n", @@ -491,10 +685,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "d517424d-8893-4375-ac3b-c532c8682b6a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------+----------+-----------+\n", + "| namespace| tableName|isTemporary|\n", + "+-------------------+----------+-----------+\n", + "|COLLADO_TEST.PUBLIC|TEST_TABLE| false|\n", + "+-------------------+----------+-----------+\n", + "\n" + ] + } + ], "source": [ "new_spark.sql(\"USE NAMESPACE COLLADO_TEST.PUBLIC\")\n", "new_spark.sql(\"SHOW TABLES\").show()" @@ -510,10 +717,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "7fce4b1f-4d71-4d03-8b60-3e9ca6ca6ddf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+-------------+\n", + "| id| data|\n", + "+---+-------------+\n", + "| 1| some data|\n", + "| 2| more data|\n", + "| 3|yet more data|\n", + "+---+-------------+\n", + "\n" + ] + } + ], "source": [ "new_spark.sql(\"SELECT * FROM TEST_TABLE\").show()" ] @@ -528,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "27434b46-1d40-4bd5-b247-66ec069db265", "metadata": { "editable": true, @@ -539,7 +761,23 @@ "raises-exception" ] }, - "outputs": [], + "outputs": [ + { + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling o90.sql.\n: org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'mlee' with activated PrincipalRoles '[]' and activated grants via '[product_manager, read_only]' is not authorized for op UPDATE_TABLE\n\tat org.apache.iceberg.rest.ErrorHandlers$DefaultErrorHandler.accept(ErrorHandlers.java:212)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:97)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:80)\n\tat org.apache.iceberg.rest.HTTPClient.throwFailure(HTTPClient.java:211)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:323)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:262)\n\tat org.apache.iceberg.rest.HTTPClient.post(HTTPClient.java:368)\n\tat org.apache.iceberg.rest.RESTClient.post(RESTClient.java:112)\n\tat org.apache.iceberg.rest.RESTTableOperations.commit(RESTTableOperations.java:159)\n\tat org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:429)\n\tat org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:413)\n\tat org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:219)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:203)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196)\n\tat org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:401)\n\tat org.apache.iceberg.spark.source.SparkWrite.commitOperation(SparkWrite.java:233)\n\tat org.apache.iceberg.spark.source.SparkWrite$BatchAppend.commit(SparkWrite.java:301)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:399)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)\n\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)\n\tat org.apache.spark.sql.Dataset.(Dataset.scala:220)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mnew_spark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mINSERT INTO TEST_TABLE VALUES (4, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43myou cannot see this data\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m), (5, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mit will never be inserted\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m), (6, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msad emoji\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", + "File \u001b[0;32m/usr/local/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", + "File \u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", + "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o90.sql.\n: org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'mlee' with activated PrincipalRoles '[]' and activated grants via '[product_manager, read_only]' is not authorized for op UPDATE_TABLE\n\tat org.apache.iceberg.rest.ErrorHandlers$DefaultErrorHandler.accept(ErrorHandlers.java:212)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:97)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:80)\n\tat org.apache.iceberg.rest.HTTPClient.throwFailure(HTTPClient.java:211)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:323)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:262)\n\tat org.apache.iceberg.rest.HTTPClient.post(HTTPClient.java:368)\n\tat org.apache.iceberg.rest.RESTClient.post(RESTClient.java:112)\n\tat org.apache.iceberg.rest.RESTTableOperations.commit(RESTTableOperations.java:159)\n\tat org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:429)\n\tat org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:413)\n\tat org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:219)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:203)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196)\n\tat org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:401)\n\tat org.apache.iceberg.spark.source.SparkWrite.commitOperation(SparkWrite.java:233)\n\tat org.apache.iceberg.spark.source.SparkWrite$BatchAppend.commit(SparkWrite.java:301)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:399)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)\n\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)\n\tat org.apache.spark.sql.Dataset.(Dataset.scala:220)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n" + ] + } + ], "source": [ "new_spark.sql(\"INSERT INTO TEST_TABLE VALUES (4, 'you cannot see this data'), (5, 'it will never be inserted'), (6, 'sad emoji')\")" ] diff --git a/getting-started/telemetry/README.md b/getting-started/telemetry/README.md new file mode 100644 index 0000000000..a956d60664 --- /dev/null +++ b/getting-started/telemetry/README.md @@ -0,0 +1,61 @@ + + +# Getting Started with Apache Polaris, Prometheus and Jaeger + +This example requires `jq` to be installed on your machine. + +1. Build the Polaris image if it's not already present locally: + + ```shell + ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/telemetry/docker-compose.yml up + ``` + +3. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +4. Then, use the access token in the Authorization header when accessing Polaris; you can also test + the `Polairs-Request-Id` header; you should see it in all logs and traces: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles \ + -H "Authorization: Bearer $POLARIS_TOKEN" \ + -H "Polaris-Request-Id: 1234" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo \ + -H "Authorization: Bearer $POLARIS_TOKEN" \ + -H "Polaris-Request-Id: 5678" + ``` + +5. Access the following services: + + - Prometheus UI: browse to http://localhost:9093 to view metrics. + - Jaeger UI: browse to http://localhost:16686 to view traces. diff --git a/docker/telemetry/docker-compose.yml b/getting-started/telemetry/docker-compose.yml similarity index 76% rename from docker/telemetry/docker-compose.yml rename to getting-started/telemetry/docker-compose.yml index 17cef58464..a9272e6cfd 100644 --- a/docker/telemetry/docker-compose.yml +++ b/getting-started/telemetry/docker-compose.yml @@ -32,6 +32,17 @@ services: environment: POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t quarkus.otel.exporter.otlp.endpoint: http://jaeger:4317 + # Resource attributes (added to all traces) + quarkus.otel.resource.attributes[0]: polaris.app=polaris-getting-started + quarkus.otel.resource.attributes[1]: polaris.env=dev + # Custom metrics (added to all requests) + polaris.metrics.tags.app: polaris-getting-started + polaris.metrics.tags.env: dev + # Custom logging (added to all log messages) + polaris.log.mdc.app: polaris-getting-started + polaris.log.mdc.env: dev + quarkus.log.console.format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{app},%X{env}] [%X{realmId},%X{requestId}] [%X{traceId},%X{spanId}] (%t) %s%e%n" + quarkus.log.file.format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{app},%X{env}] [%X{realmId},%X{requestId}] [%X{traceId},%X{spanId}] (%t) %s%e%n" polaris.realm-context.realms: POLARIS healthcheck: test: ["CMD", "curl", "http://localhost:8182/q/health"] From 0f90526a5b2429eb0368178ac09aee0ae937bf32 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Sat, 25 Jan 2025 20:19:52 +0100 Subject: [PATCH 6/7] review --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index abcb748f88..d9a4390c15 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ select * from db1.table1; - `docker run -p 8181:8181 -p 8182:8182 apache/polaris:latest` - To run the image. The Polaris codebase contains some docker compose examples to quickly get started with Polaris, -using different configurations. Check the `./docker` directory for more information. +using different configurations. Check the `./getting-started` directory for more information. #### Running in Kubernetes From ff99e5d0e888ab5280dc499fa42a8c1ff2579472 Mon Sep 17 00:00:00 2001 From: Alexandre Dutra Date: Mon, 27 Jan 2025 09:36:21 +0100 Subject: [PATCH 7/7] revert spurious changes --- .../spark/notebooks/SparkPolaris.ipynb | 296 ++---------------- 1 file changed, 28 insertions(+), 268 deletions(-) diff --git a/getting-started/spark/notebooks/SparkPolaris.ipynb b/getting-started/spark/notebooks/SparkPolaris.ipynb index ec4b0f649d..adb2f1a2ce 100644 --- a/getting-started/spark/notebooks/SparkPolaris.ipynb +++ b/getting-started/spark/notebooks/SparkPolaris.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "f982815a-2b48-46ab-96a6-20dad7ec1420", "metadata": {}, "outputs": [], @@ -48,21 +48,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0f7a311a-9a55-4ff7-a40e-db3c74c53b9b", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "PolarisCatalog(type='INTERNAL', name='polaris_demo', properties=CatalogProperties(default_base_location='file:///tmp/polaris/', additional_properties={}), create_timestamp=1737806925560, last_update_timestamp=1737806925560, entity_version=1, storage_config_info=FileStorageConfigInfo(storage_type='FILE', allowed_locations=['file:///tmp', 'file:///tmp/polaris/']))" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from polaris.management import *\n", "\n", @@ -90,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "e3e42c12-4e01-4577-bdf5-90c2704a5de8", "metadata": {}, "outputs": [], @@ -142,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "c5ceb5ca-f977-46c7-b2a6-07dda59e8a8b", "metadata": {}, "outputs": [], @@ -186,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b51a6433-99c9-46c5-a855-928e30bad6e5", "metadata": {}, "outputs": [], @@ -256,7 +245,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "fd13f24b-9d59-470d-9be1-660c22dde680", "metadata": { "tags": [] @@ -305,24 +294,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "72e9e5fb-b22e-4d38-bb1e-4ca78c0d0f3e", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------+\n", - "|namespace|\n", - "+---------+\n", - "+---------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "spark.sql(\"USE polaris\")\n", "spark.sql(\"SHOW NAMESPACES\").show()" @@ -338,25 +315,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "54159ab2-5964-49a0-8202-a4b64ee4f9e7", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-------------------+\n", - "| namespace|\n", - "+-------------------+\n", - "|COLLADO_TEST.PUBLIC|\n", - "+-------------------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "spark.sql(\"CREATE NAMESPACE IF NOT EXISTS COLLADO_TEST\")\n", "spark.sql(\"CREATE NAMESPACE IF NOT EXISTS COLLADO_TEST.PUBLIC\")\n", @@ -373,23 +337,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "4abc8426-7f2a-4f3f-9e26-1f1824f870c6", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/plain": [ - "DataFrame[]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "spark.sql(\"USE NAMESPACE COLLADO_TEST.PUBLIC\")\n", "spark.sql(\"\"\"CREATE TABLE IF NOT EXISTS TEST_TABLE (\n", @@ -409,24 +362,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "ff5a466d-6a67-4f42-a6a6-ac54ec258e54", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+----+\n", - "| id|data|\n", - "+---+----+\n", - "+---+----+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "spark.sql(\"SELECT * FROM TEST_TABLE\").show()" ] @@ -442,27 +383,12 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "d7ab2991-6de9-4105-9f95-4c9f1c18f426", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+-------------+\n", - "| id| data|\n", - "+---+-------------+\n", - "| 1| some data|\n", - "| 2| more data|\n", - "| 3|yet more data|\n", - "+---+-------------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "spark.sql(\"INSERT INTO TEST_TABLE VALUES (1, 'some data'), (2, 'more data'), (3, 'yet more data')\")\n", "spark.sql(\"SELECT * FROM TEST_TABLE\").show()" @@ -479,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "75131c37-2ad2-4e6d-bd65-64a915bb6694", "metadata": {}, "outputs": [], @@ -513,121 +439,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "cf24575d-4bfd-456c-962c-82f80fda5cc0", "metadata": {}, - "outputs": [ - { - "data": { - "application/json": { - "metadata": { - "current-schema-id": 0, - "current-snapshot-id": 7731655228716575000, - "default-sort-order-id": 0, - "default-spec-id": 0, - "format-version": 2, - "last-column-id": 2, - "last-partition-id": 999, - "last-sequence-number": 1, - "last-updated-ms": 1737806988261, - "location": "file:///tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE", - "metadata-log": [ - { - "metadata-file": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/00000-3ca446e8-576f-4dd6-b755-5870a41994c3.metadata.json", - "timestamp-ms": 1737806977962 - } - ], - "partition-specs": [ - { - "fields": [] - } - ], - "properties": { - "created-at": "2025-01-25T12:09:37.942122132Z", - "owner": "jovyan", - "write.parquet.compression-codec": "zstd" - }, - "refs": { - "main": { - "snapshot-id": 7731655228716575000, - "type": "branch" - } - }, - "schemas": [ - { - "fields": [ - { - "doc": "unique id", - "id": 1, - "name": "id", - "required": true, - "type": "long" - }, - { - "id": 2, - "name": "data", - "required": false, - "type": "string" - } - ], - "type": "struct" - } - ], - "snapshot-log": [ - { - "snapshot-id": 7731655228716575000, - "timestamp-ms": 1737806988261 - } - ], - "snapshots": [ - { - "manifest-list": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/snap-7731655228716575024-1-1ec6c15b-54c4-4bc5-a950-07852d066caf.avro", - "schema-id": 0, - "sequence-number": 1, - "snapshot-id": 7731655228716575000, - "summary": { - "added-data-files": "3", - "added-files-size": "2131", - "added-records": "3", - "app-id": "local-1737806967496", - "changed-partition-count": "1", - "engine-name": "spark", - "engine-version": "3.5.0", - "iceberg-version": "Apache Iceberg 1.7.1 (commit 4a432839233f2343a9eae8255532f911f06358ef)", - "operation": "append", - "spark.app.id": "local-1737806967496", - "total-data-files": "3", - "total-delete-files": "0", - "total-equality-deletes": "0", - "total-files-size": "2131", - "total-position-deletes": "0", - "total-records": "3" - }, - "timestamp-ms": 1737806988261 - } - ], - "sort-orders": [ - { - "fields": [] - } - ], - "table-uuid": "970e002a-9611-4649-880a-7bf1e7905284" - }, - "metadata-location": "file:/tmp/polaris/COLLADO_TEST/PUBLIC/TEST_TABLE/metadata/00001-16b7ebd0-8166-4d43-a13b-a4b42db01e17.metadata.json" - }, - "text/plain": [ - "" - ] - }, - "metadata": { - "application/json": { - "expanded": true, - "root": "root" - } - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import codecs\n", "import json\n", @@ -652,21 +467,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "6f3aac79-bf45-4603-bd64-30eeab4bdfa7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DataFrame[]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# The new spark session inherits everything from the previous session except for the overridden credentials\n", "new_spark = spark.newSession()\n", @@ -685,23 +489,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "d517424d-8893-4375-ac3b-c532c8682b6a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-------------------+----------+-----------+\n", - "| namespace| tableName|isTemporary|\n", - "+-------------------+----------+-----------+\n", - "|COLLADO_TEST.PUBLIC|TEST_TABLE| false|\n", - "+-------------------+----------+-----------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "new_spark.sql(\"USE NAMESPACE COLLADO_TEST.PUBLIC\")\n", "new_spark.sql(\"SHOW TABLES\").show()" @@ -717,25 +508,10 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "7fce4b1f-4d71-4d03-8b60-3e9ca6ca6ddf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+-------------+\n", - "| id| data|\n", - "+---+-------------+\n", - "| 1| some data|\n", - "| 2| more data|\n", - "| 3|yet more data|\n", - "+---+-------------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "new_spark.sql(\"SELECT * FROM TEST_TABLE\").show()" ] @@ -750,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "27434b46-1d40-4bd5-b247-66ec069db265", "metadata": { "editable": true, @@ -761,23 +537,7 @@ "raises-exception" ] }, - "outputs": [ - { - "ename": "Py4JJavaError", - "evalue": "An error occurred while calling o90.sql.\n: org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'mlee' with activated PrincipalRoles '[]' and activated grants via '[product_manager, read_only]' is not authorized for op UPDATE_TABLE\n\tat org.apache.iceberg.rest.ErrorHandlers$DefaultErrorHandler.accept(ErrorHandlers.java:212)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:97)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:80)\n\tat org.apache.iceberg.rest.HTTPClient.throwFailure(HTTPClient.java:211)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:323)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:262)\n\tat org.apache.iceberg.rest.HTTPClient.post(HTTPClient.java:368)\n\tat org.apache.iceberg.rest.RESTClient.post(RESTClient.java:112)\n\tat org.apache.iceberg.rest.RESTTableOperations.commit(RESTTableOperations.java:159)\n\tat org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:429)\n\tat org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:413)\n\tat org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:219)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:203)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196)\n\tat org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:401)\n\tat org.apache.iceberg.spark.source.SparkWrite.commitOperation(SparkWrite.java:233)\n\tat org.apache.iceberg.spark.source.SparkWrite$BatchAppend.commit(SparkWrite.java:301)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:399)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)\n\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)\n\tat org.apache.spark.sql.Dataset.(Dataset.scala:220)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mnew_spark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mINSERT INTO TEST_TABLE VALUES (4, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43myou cannot see this data\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m), (5, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mit will never be inserted\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m), (6, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43msad emoji\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/usr/local/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", - "File \u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", - "File \u001b[0;32m/usr/local/spark/python/pyspark/errors/exceptions/captured.py:179\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m Py4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", - "File \u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", - "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o90.sql.\n: org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'mlee' with activated PrincipalRoles '[]' and activated grants via '[product_manager, read_only]' is not authorized for op UPDATE_TABLE\n\tat org.apache.iceberg.rest.ErrorHandlers$DefaultErrorHandler.accept(ErrorHandlers.java:212)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:97)\n\tat org.apache.iceberg.rest.ErrorHandlers$CommitErrorHandler.accept(ErrorHandlers.java:80)\n\tat org.apache.iceberg.rest.HTTPClient.throwFailure(HTTPClient.java:211)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:323)\n\tat org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:262)\n\tat org.apache.iceberg.rest.HTTPClient.post(HTTPClient.java:368)\n\tat org.apache.iceberg.rest.RESTClient.post(RESTClient.java:112)\n\tat org.apache.iceberg.rest.RESTTableOperations.commit(RESTTableOperations.java:159)\n\tat org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:429)\n\tat org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:413)\n\tat org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:219)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:203)\n\tat org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196)\n\tat org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:401)\n\tat org.apache.iceberg.spark.source.SparkWrite.commitOperation(SparkWrite.java:233)\n\tat org.apache.iceberg.spark.source.SparkWrite$BatchAppend.commit(SparkWrite.java:301)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:399)\n\tat org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:359)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:337)\n\tat org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:336)\n\tat org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:225)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)\n\tat org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)\n\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)\n\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)\n\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)\n\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)\n\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)\n\tat org.apache.spark.sql.Dataset.(Dataset.scala:220)\n\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)\n\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:638)\n\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:629)\n\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:659)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)\n\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n" - ] - } - ], + "outputs": [], "source": [ "new_spark.sql(\"INSERT INTO TEST_TABLE VALUES (4, 'you cannot see this data'), (5, 'it will never be inserted'), (6, 'sad emoji')\")" ]