diff --git a/README.md b/README.md index 1e1e9ed54a..d9a4390c15 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,9 @@ select * from db1.table1; build the image locally. - `docker run -p 8181:8181 -p 8182:8182 apache/polaris:latest` - To run the image. +The Polaris codebase contains some docker compose examples to quickly get started with Polaris, +using different configurations. Check the `./getting-started` directory for more information. + #### Running in Kubernetes - `./run.sh` - To run Polaris as a mini-deployment locally. This will create a Kind cluster, diff --git a/getting-started/README.md b/getting-started/README.md new file mode 100644 index 0000000000..e36e306465 --- /dev/null +++ b/getting-started/README.md @@ -0,0 +1,44 @@ + + +# Getting Started with Apache Polaris + +You can quickly get started with Polaris by playing with the docker-compose examples provided in +this directory. Each example has detailed instructions. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) +- [jq](https://stedolan.github.io/jq/download/) (for some examples) + +## Getting Started Examples + +- [Spark](spark): An example that uses an in-memory metastore, automatically bootstrapped, with + Apache Spark and a Jupyter notebook. + +- [Trino](trino): An example that uses Trino with Polaris. + +- [Telemetry](telemetry): An example that includes Prometheus and Jaeger to collect metrics and + traces from Apache Polaris. This example automatically creates a `polaris_demo` catalog. + +- [Eclipselink](elipselink): An example that uses an Eclipselink metastore and a Postgres + database. The realm is bootstrapped with the Polaris Admin tool. This example also creates a + `polaris_demo` catalog, and offers the ability to run Spark SQL queries. Finally, it shows how to + attach a debugger to the Polaris server. diff --git a/getting-started/assets/eclipselink/persistence.xml b/getting-started/assets/eclipselink/persistence.xml new file mode 100644 index 0000000000..38c3676ded --- /dev/null +++ b/getting-started/assets/eclipselink/persistence.xml @@ -0,0 +1,43 @@ + + + + + + org.eclipse.persistence.jpa.PersistenceProvider + org.apache.polaris.jpa.models.ModelEntity + org.apache.polaris.jpa.models.ModelEntityActive + org.apache.polaris.jpa.models.ModelEntityChangeTracking + org.apache.polaris.jpa.models.ModelEntityDropped + org.apache.polaris.jpa.models.ModelGrantRecord + org.apache.polaris.jpa.models.ModelPrincipalSecrets + org.apache.polaris.jpa.models.ModelSequenceId + NONE + + + + + + + + + \ No newline at end of file diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh new file mode 100755 index 0000000000..f069c66376 --- /dev/null +++ b/getting-started/assets/polaris/create-catalog.sh @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e + +token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --user root:s3cr3t \ + -d grant_type=client_credentials \ + -d scope=PRINCIPAL_ROLE:ALL | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') + +if [ -z "${token}" ]; then + echo "Failed to obtain access token." + exit 1 +fi + +echo +echo "Obtained access token: ${token}" + +echo +echo Creating a catalog named polaris_demo... + +curl -s -H "Authorization: Bearer ${token}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://polaris:8181/api/management/v1/catalogs \ + -d '{ + "catalog": { + "name": "polaris_demo", + "type": "INTERNAL", + "readOnly": false, + "properties": { + "default-base-location": "file:///tmp/polaris/" + }, + "storageConfigInfo": { + "storageType": "FILE", + "allowedLocations": [ + "file:///tmp" + ] + } + } + }' + +echo +echo Done. \ No newline at end of file diff --git a/getting-started/assets/prometheus/prometheus.yml b/getting-started/assets/prometheus/prometheus.yml new file mode 100644 index 0000000000..99c779ba73 --- /dev/null +++ b/getting-started/assets/prometheus/prometheus.yml @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +global: + scrape_interval: 15s + evaluation_interval: 15s +scrape_configs: + - job_name: 'polaris' + scrape_interval: 5s + metrics_path: /q/metrics + static_configs: + - targets: ['polaris:8182'] + labels: + service: polaris diff --git a/getting-started/elipselink/README.md b/getting-started/elipselink/README.md new file mode 100644 index 0000000000..793b7e9623 --- /dev/null +++ b/getting-started/elipselink/README.md @@ -0,0 +1,71 @@ + + +# Getting Started with Apache Polaris, EclipseLink, Postgres and Spark SQL + +This example requires `jq` to be installed on your machine. + +1. If such an image is not already present, build the Polaris image with support for EclipseLink and + the Postgres JDBC driver: + + ```shell + ./gradlew :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/eclipselink/docker-compose.yml up + ``` + +3. Using spark-sql: attach to the running spark-sql container: + + ```shell + docker attach $(docker ps -q --filter name=spark-sql) + ``` + + You may not see Spark's prompt immediately, type ENTER to see it. A few commands that you can try: + + ```sql + CREATE NAMESPACE polaris.ns1; + USE polaris.ns1; + CREATE TABLE table1 (id int, name string); + INSERT INTO table1 VALUES (1, 'a'); + SELECT * FROM table1; + ``` + +4. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +5. Then, use the access token in the Authorization header when accessing Polaris: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + ``` diff --git a/getting-started/elipselink/docker-compose.yml b/getting-started/elipselink/docker-compose.yml new file mode 100644 index 0000000000..d9b5abc2d1 --- /dev/null +++ b/getting-started/elipselink/docker-compose.yml @@ -0,0 +1,121 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + polaris: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris:postgres-latest + ports: + # API port + - "8181:8181" + # Management port (metrics and health checks) + - "8182:8182" + # Optional, allows attaching a debugger to the Polaris JVM + - "5005:5005" + depends_on: + polaris-bootstrap: + condition: service_completed_successfully + postgres: + condition: service_healthy + environment: + JAVA_DEBUG: "true" + JAVA_DEBUG_PORT: "*:5005" + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + + polaris-bootstrap: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris-admin-tool:postgres-latest + depends_on: + postgres: + condition: service_healthy + environment: + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + command: + - "bootstrap" + - "--realm=POLARIS" + - "--credential=POLARIS,root,s3cr3t" + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/polaris/:/polaris + entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' + + postgres: + image: postgres:17.2 + ports: + - "5432:5432" + # set shared memory limit when using docker-compose + shm_size: 128mb + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: POLARIS + POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" + healthcheck: + test: "pg_isready -U postgres" + interval: 5s + timeout: 2s + retries: 15 + + spark-sql: + image: apache/spark:3.5.4-java17-python3 + depends_on: + polaris-setup: + condition: service_completed_successfully + stdin_open: true + tty: true + ports: + - "4040-4045:4040-4045" + healthcheck: + test: "curl localhost:4040" + interval: 5s + retries: 15 + command: [ + /opt/spark/bin/spark-sql, + --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17", + --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", + --conf, "spark.sql.catalog.polaris.type=rest", + --conf, "spark.sql.catalog.polaris.warehouse=polaris_demo", + --conf, "spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog", + --conf, "spark.sql.catalog.polaris.credential=root:s3cr3t", + --conf, "spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL", + --conf, "spark.sql.defaultCatalog=polaris", + --conf, "spark.sql.catalogImplementation=in-memory", + ] + volumes: + - ~/.ivy2:/home/spark/.ivy2 diff --git a/getting-started/spark/README.md b/getting-started/spark/README.md index 55e4f9d941..793fbf0528 100644 --- a/getting-started/spark/README.md +++ b/getting-started/spark/README.md @@ -22,10 +22,19 @@ This getting started guide provides a `docker-compose` file to set up [Apache Spark](https://spark.apache.org/) with Apache Polaris. Apache Polaris is configured as an Iceberg REST Catalog in Spark. A Jupyter notebook is used to run PySpark. +## Build the Polaris image + +If a Polaris image is not already present locally, build one with the following command: + +```shell +./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true +``` + ## Run the `docker-compose` file + To start the `docker-compose` file, run this command from the repo's root directory: -``` -docker-compose -f getting-started/spark/docker-compose.yml up +```shell +docker-compose -f getting-started/spark/docker-compose.yml up ``` This will spin up 2 container services @@ -38,8 +47,5 @@ In the Jupyter notebook container log, look for the URL to access the Jupyter no Open the Jupyter notebook in a browser. Navigate to [`notebooks/SparkPolaris.ipynb`](http://127.0.0.1:8888/lab/tree/notebooks/SparkPolaris.ipynb) -## Change the Polaris credential -The Polaris service will create a new root crendential on startup, find this credential in the Polaris service log and change the `polaris_credential` variable in the first cell of the jupyter notebook - ## Run the Jupyter notebook You can now run all cells in the notebook or write your own code! diff --git a/getting-started/spark/docker-compose.yml b/getting-started/spark/docker-compose.yml index 3c6468c825..a6d51f3a99 100644 --- a/getting-started/spark/docker-compose.yml +++ b/getting-started/spark/docker-compose.yml @@ -19,9 +19,7 @@ services: polaris: - build: - context: ../../ - network: host + image: apache/polaris:latest ports: - "8181:8181" - "8182" @@ -29,7 +27,9 @@ services: AWS_REGION: us-west-2 AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY - + POLARIS_BOOTSTRAP_CREDENTIALS: default-realm,root,s3cr3t + polaris.realm-context.realms: default-realm + quarkus.otel.sdk.disabled: "true" healthcheck: test: ["CMD", "curl", "http://localhost:8182/healthcheck"] interval: 10s diff --git a/getting-started/spark/notebooks/SparkPolaris.ipynb b/getting-started/spark/notebooks/SparkPolaris.ipynb index 69d0bb3f26..adb2f1a2ce 100644 --- a/getting-started/spark/notebooks/SparkPolaris.ipynb +++ b/getting-started/spark/notebooks/SparkPolaris.ipynb @@ -21,9 +21,7 @@ "from polaris.catalog.api_client import ApiClient as CatalogApiClient\n", "from polaris.catalog.api_client import Configuration as CatalogApiClientConfiguration\n", "\n", - "# (CHANGE ME): This credential changes on every Polaris service restart\n", - "# In the Polaris log, look for the `realm: default-realm root principal credentials:` string\n", - "polaris_credential = '35df9f8a34199df0:101b9d35700032416210ad2d39b1b4e3' # pragma: allowlist secret\n", + "polaris_credential = 'root:s3cr3t' # pragma: allowlist secret\n", "\n", "client_id, client_secret = polaris_credential.split(\":\")\n", "client = CatalogApiClient(CatalogApiClientConfiguration(username=client_id,\n", diff --git a/getting-started/telemetry/README.md b/getting-started/telemetry/README.md new file mode 100644 index 0000000000..a956d60664 --- /dev/null +++ b/getting-started/telemetry/README.md @@ -0,0 +1,61 @@ + + +# Getting Started with Apache Polaris, Prometheus and Jaeger + +This example requires `jq` to be installed on your machine. + +1. Build the Polaris image if it's not already present locally: + + ```shell + ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true + ``` + +2. Start the docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/telemetry/docker-compose.yml up + ``` + +3. To access Polaris from the host machine, first request an access token: + + ```shell + export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user root:s3cr3t \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) + ``` + +4. Then, use the access token in the Authorization header when accessing Polaris; you can also test + the `Polairs-Request-Id` header; you should see it in all logs and traces: + + ```shell + curl -v http://127.0.0.1:8181/api/management/v1/principal-roles \ + -H "Authorization: Bearer $POLARIS_TOKEN" \ + -H "Polaris-Request-Id: 1234" + curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo \ + -H "Authorization: Bearer $POLARIS_TOKEN" \ + -H "Polaris-Request-Id: 5678" + ``` + +5. Access the following services: + + - Prometheus UI: browse to http://localhost:9093 to view metrics. + - Jaeger UI: browse to http://localhost:16686 to view traces. diff --git a/getting-started/telemetry/docker-compose.yml b/getting-started/telemetry/docker-compose.yml new file mode 100644 index 0000000000..a9272e6cfd --- /dev/null +++ b/getting-started/telemetry/docker-compose.yml @@ -0,0 +1,93 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Management port (metrics and health checks) + - "8182:8182" + depends_on: + jaeger: + condition: service_healthy + environment: + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + quarkus.otel.exporter.otlp.endpoint: http://jaeger:4317 + # Resource attributes (added to all traces) + quarkus.otel.resource.attributes[0]: polaris.app=polaris-getting-started + quarkus.otel.resource.attributes[1]: polaris.env=dev + # Custom metrics (added to all requests) + polaris.metrics.tags.app: polaris-getting-started + polaris.metrics.tags.env: dev + # Custom logging (added to all log messages) + polaris.log.mdc.app: polaris-getting-started + polaris.log.mdc.env: dev + quarkus.log.console.format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{app},%X{env}] [%X{realmId},%X{requestId}] [%X{traceId},%X{spanId}] (%t) %s%e%n" + quarkus.log.file.format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{app},%X{env}] [%X{realmId},%X{requestId}] [%X{traceId},%X{spanId}] (%t) %s%e%n" + polaris.realm-context.realms: POLARIS + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/polaris/:/polaris + entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' + + prometheus: + image: docker.io/prom/prometheus:v3.1.0 + ports: + - "9093:9090" + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/prometheus/:/etc/prometheus/ + command: + - --config.file=/etc/prometheus/prometheus.yml + healthcheck: + test: "wget -O /dev/null -o /dev/null http://localhost:9090" + interval: 5s + timeout: 2s + retries: 15 + + # Jaeger (OpenTelemetry traces collector) + jaeger: + image: docker.io/jaegertracing/all-in-one:1.65.0 + ports: + # Jaeger gRPC collector, used by Polaris + - "4317:4317" + # Jaeger UI + - "16686:16686" + environment: + - COLLECTOR_OTLP_ENABLED=true + healthcheck: + test: "echo -e 'GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n' | nc localhost 16686 | grep -q '200 OK'" + interval: 5s + timeout: 2s + retries: 15