From 3f739d0514a97b5ee7800d525629b84dab175267 Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Thu, 10 Apr 2025 14:14:27 -0700 Subject: [PATCH 01/21] First Draft with AWS --- .../assets/cloud_providers/deploy-aws.sh | 76 ++++++ .../assets/eclipselink/persistence.xml | 3 +- .../assets/polaris/create-catalog.sh | 4 +- getting-started/eclipselink/README.md | 16 +- .../eclipselink/docker-compose-postgres.yml | 61 +++++ .../eclipselink/docker-compose.yml | 44 +--- .../trino-config/catalog/iceberg.properties | 28 +++ .../unreleased/quickstart-deploy-aws.md | 45 ++++ .../quickstart-deployment-parent.md | 27 ++ .../unreleased/quickstart-installation.md | 116 +++++++++ .../unreleased/quickstart-local-deployment.md | 119 +++++++++ ...ckstart.md => quickstart-using-polaris.md} | 231 +++++++----------- 12 files changed, 583 insertions(+), 187 deletions(-) create mode 100644 getting-started/assets/cloud_providers/deploy-aws.sh create mode 100644 getting-started/eclipselink/docker-compose-postgres.yml create mode 100644 getting-started/eclipselink/trino-config/catalog/iceberg.properties create mode 100644 site/content/in-dev/unreleased/quickstart-deploy-aws.md create mode 100644 site/content/in-dev/unreleased/quickstart-deployment-parent.md create mode 100644 site/content/in-dev/unreleased/quickstart-installation.md create mode 100644 site/content/in-dev/unreleased/quickstart-local-deployment.md rename site/content/in-dev/unreleased/{quickstart.md => quickstart-using-polaris.md} (58%) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh new file mode 100644 index 0000000000..aa922ebad7 --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) + +DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ + --instance-ids $EC2_INSTANCE_ID \ + --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ + --output json) + +CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") + +CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') + +ALL_SUBNETS=$(aws ec2 describe-subnets \ + --region $CURRENT_REGION \ + --query 'Subnets[*].{SubnetId:SubnetId}' \ + --output json \ + | jq -r '[.[]["SubnetId"]] | join(" ")') + +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8) + +aws rds create-db-subnet-group \ + --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ + --db-subnet-group-description "Apache Polaris Quickstart DB Subnet Group" \ + --subnet-ids $ALL_SUBNETS + +DB_INSTANCE_INFO=$(aws rds create-db-instance \ + --db-instance-identifier polaris-backend-test-$RANDOM_SUFFIX \ + --db-instance-class db.t3.micro \ + --engine postgres \ + --master-username postgres \ + --master-user-password postgres \ + --db-name POLARIS \ + --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ + --allocated-storage 10) + +DB_ARN=$(echo $DB_INSTANCE_INFO | jq -r '.["DBInstance"]["DBInstanceArn"]') + +DESCRIBE_DB=$(aws rds describe-db-instances --db-instance-identifier $DB_ARN) + +until echo $DESCRIBE_DB | jq -e '.["DBInstances"][0] | has("Endpoint")'; +do + echo "sleeping 10s to wait for Postgres DB provisioning..." + sleep 10 + DESCRIBE_DB=$(aws rds describe-db-instances --db-instance-identifier $DB_ARN) +done + +POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq -r '"\(.Address):\(.Port)"') + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose.yml up \ No newline at end of file diff --git a/getting-started/assets/eclipselink/persistence.xml b/getting-started/assets/eclipselink/persistence.xml index e569a91832..54fb795a39 100644 --- a/getting-started/assets/eclipselink/persistence.xml +++ b/getting-started/assets/eclipselink/persistence.xml @@ -32,8 +32,7 @@ org.apache.polaris.jpa.models.ModelSequenceId NONE - + diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index f069c66376..048b32836a 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -41,11 +41,11 @@ curl -s -H "Authorization: Bearer ${token}" \ http://polaris:8181/api/management/v1/catalogs \ -d '{ "catalog": { - "name": "polaris_demo", + "name": "quickstart_catalog", "type": "INTERNAL", "readOnly": false, "properties": { - "default-base-location": "file:///tmp/polaris/" + "default-base-location": "file:///tmp/quickstart_catalog/" }, "storageConfigInfo": { "storageType": "FILE", diff --git a/getting-started/eclipselink/README.md b/getting-started/eclipselink/README.md index 53618b415a..a85565104c 100644 --- a/getting-started/eclipselink/README.md +++ b/getting-started/eclipselink/README.md @@ -32,13 +32,19 @@ This example requires `jq` to be installed on your machine. --no-build-cache ``` -2. Start the docker compose group by running the following command from the root of the repository: +2. Start the database docker compose group by running the following command from the root of the repository: + + ```shell + docker compose -f getting-started/eclipselink/docker-compose-postgres.yml up + ``` + +3. Start the docker compose group by running the following command from the root of the repository: ```shell docker compose -f getting-started/eclipselink/docker-compose.yml up ``` -3. Using spark-sql: attach to the running spark-sql container: +4. Using spark-sql: attach to the running spark-sql container: ```shell docker attach $(docker ps -q --filter name=spark-sql) @@ -54,7 +60,7 @@ This example requires `jq` to be installed on your machine. SELECT * FROM table1; ``` -4. To access Polaris from the host machine, first request an access token: +5. To access Polaris from the host machine, first request an access token: ```shell export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ @@ -64,9 +70,9 @@ This example requires `jq` to be installed on your machine. -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) ``` -5. Then, use the access token in the Authorization header when accessing Polaris: +6. Then, use the access token in the Authorization header when accessing Polaris: ```shell curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" - curl -v http://127.0.0.1:8181/api/catalog/v1/config?warehouse=polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" + curl -v http://127.0.0.1:8181/api/management/v1/catalogs/polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" ``` diff --git a/getting-started/eclipselink/docker-compose-postgres.yml b/getting-started/eclipselink/docker-compose-postgres.yml new file mode 100644 index 0000000000..35c724299e --- /dev/null +++ b/getting-started/eclipselink/docker-compose-postgres.yml @@ -0,0 +1,61 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-bootstrap: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris-admin-tool:postgres-latest + depends_on: + postgres: + condition: service_healthy + environment: + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + command: + - "bootstrap" + - "--realm=POLARIS" + - "--credential=POLARIS,root,s3cr3t" + + postgres: + image: postgres:17.4 + ports: + - "5432:5432" + # set shared memory limit when using docker-compose + shm_size: 128mb + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: POLARIS + POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" + volumes: + # Bind local conf file to a convenient location in the container + - type: bind + source: ./postgresql.conf + target: /etc/postgresql/postgresql.conf + command: + - "postgres" + - "-c" + - "config_file=/etc/postgresql/postgresql.conf" + healthcheck: + test: "pg_isready -U postgres" + interval: 5s + timeout: 2s + retries: 15 \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 252ea53d64..2dc7f92f4f 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -29,11 +29,6 @@ services: - "8182:8182" # Optional, allows attaching a debugger to the Polaris JVM - "5005:5005" - depends_on: - polaris-bootstrap: - condition: service_completed_successfully - postgres: - condition: service_healthy environment: JAVA_DEBUG: "true" JAVA_DEBUG_PORT: "*:5005" @@ -49,31 +44,6 @@ services: timeout: 10s retries: 10 - polaris-bootstrap: - # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions - image: apache/polaris-admin-tool:postgres-latest - depends_on: - postgres: - condition: service_healthy - environment: - polaris.persistence.type: eclipse-link - polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml - volumes: - - ../assets/eclipselink/:/deployments/config/eclipselink - command: - - "bootstrap" - - "--realm=POLARIS" - - "--credential=POLARIS,root,s3cr3t" - - polaris-setup: - image: alpine/curl - depends_on: - polaris: - condition: service_healthy - volumes: - - ../assets/polaris/:/polaris - entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' - postgres: image: postgres:17.4 ports: @@ -119,7 +89,7 @@ services: --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", --conf, "spark.sql.catalog.polaris.type=rest", - --conf, "spark.sql.catalog.polaris.warehouse=polaris_demo", + --conf, "spark.sql.catalog.polaris.warehouse=quickstart_catalog", --conf, "spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog", --conf, "spark.sql.catalog.polaris.credential=root:s3cr3t", --conf, "spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL", @@ -127,3 +97,15 @@ services: --conf, "spark.sql.catalogImplementation=in-memory", --conf, "spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp" ] + + trino: + image: trinodb/trino:latest + depends_on: + polaris-setup: + condition: service_completed_successfully + stdin_open: true + tty: true + ports: + - "8080:8080" + volumes: + - ./trino-config/catalog:/etc/trino/catalog diff --git a/getting-started/eclipselink/trino-config/catalog/iceberg.properties b/getting-started/eclipselink/trino-config/catalog/iceberg.properties new file mode 100644 index 0000000000..1cd0a0e7a2 --- /dev/null +++ b/getting-started/eclipselink/trino-config/catalog/iceberg.properties @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +connector.name=iceberg +iceberg.catalog.type=rest +iceberg.rest-catalog.uri=http://polaris:8181/api/catalog +iceberg.rest-catalog.security=OAUTH2 +iceberg.rest-catalog.oauth2.credential=root:s3cr3t +iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL +iceberg.rest-catalog.warehouse=polaris +# Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration +fs.hadoop.enabled=true diff --git a/site/content/in-dev/unreleased/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/quickstart-deploy-aws.md new file mode 100644 index 0000000000..b30670183e --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-deploy-aws.md @@ -0,0 +1,45 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Deploying Polaris on AWS +type: docs +weight: 112 +--- + +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Amazon RDS for PostgreSQL])(https://aws.amazon.com/rds/postgresql/) instance, which will be used as the backend Postgres instance holding all Polaris data. +Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. + +The requirements to run the script below are: +* There must be at least two subnets created in the VPC and region in which your EC2 instance reside. The span of subnets MUST include at least 2 availability zones (AZs) within the same region. +* The AWS identity that you will use to run this script must have the following AWS permissions: + * "ec2:DescribeInstances" + * "rds:CreateDBInstance" + * "rds:DescribeDBInstances" + * "rds:CreateDBSubnetGroup" + +```shell + +``` + +Also, set the following static credentials for interacting with the Polaris server in the following exercises: + +```shell +export CLIENT_ID=root +export CLIENT_SECRET=s3cr3t +``` \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-deployment-parent.md b/site/content/in-dev/unreleased/quickstart-deployment-parent.md new file mode 100644 index 0000000000..24b26cad50 --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-deployment-parent.md @@ -0,0 +1,27 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Deploying Polaris +type: docs +weight: 110 +--- + +In this Quick Start flow, we will demonstrate how to deploy Polaris locally, as well as with all supported Cloud Providers: Amazon Web Services (AWS), Azure, and Google Cloud Platform (GCP). + +Locally, Polaris can be deployed using both Docker and local build. On the cloud, this tutorial will deploy Polaris using Docker only - but local builds can also be executed. \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-installation.md b/site/content/in-dev/unreleased/quickstart-installation.md new file mode 100644 index 0000000000..1dd60f6733 --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-installation.md @@ -0,0 +1,116 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Quick Start +type: docs +weight: 100 +--- + +This guide serves as an introduction to several key entities that can be managed with Apache Polaris (Incubating), describes how to build and deploy Polaris locally, and finally includes examples of how to use Polaris with Apache Spark™. + +## Prerequisites + +This guide covers building Polaris, deploying it locally or via [Docker](https://www.docker.com/), and interacting with it using the command-line interface and [Apache Spark](https://spark.apache.org/). Before proceeding with Polaris, be sure to satisfy the relevant prerequisites listed here. + +### Git + +To get the latest Polaris code, you'll need to clone the repository using [git](https://git-scm.com/). You can install git using [homebrew](https://brew.sh/) on MacOS: + +```shell +brew install git +``` + +Please follow instructions from the [Git Documentation](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) for instructions on installing Git on other platforms. + +Then, use git to clone the Polaris repo: + +```shell +cd ~ +git clone https://github.com/apache/polaris.git +``` + +### Docker + +It is recommended to deploy Polaris inside [Docker](https://www.docker.com/) for the Quick Start workflow. Instructions for deploying the Quick Start workflow on the supported Cloud Providers (AWS, Azure, GCP) will be provided only with Docker. However, non-Docker deployment instructions for local deployments can also be followed on Cloud Providers. + +Instructions to install Docker can be found on the [Docker website](https://docs.docker.com/engine/install/). Ensure that Docker and the Docker Compose plugin are both installed. + +#### Docker on MacOS +Docker can be installed using [homebrew](https://brew.sh/): + +```shell +brew install --cask docker +``` + +There could be a [Docker permission issues](https://github.com/apache/polaris/pull/971) related to seccomp configuration. To resolve these issues, set the `seccomp` profile to "unconfined" when running a container. For example: + +```shell +docker run --security-opt seccomp=unconfined apache/polaris:latest +``` + +Note: Setting the seccomp profile to "unconfined" disables the default system call filtering, which may pose security risks. Use this configuration with caution, especially in production environments. + +#### Docker on Amazon Linux +Docker can be installed using a modification to the CentOS instructions. For example: + +```shell +sudo dnf update -y +# Remove old version +sudo dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine +# Install dnf plugin +sudo dnf -y install dnf-plugins-core +# Add CentOS repository +sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo +# Adjust release server version in the path as it will not match with Amazon Linux 2023 +sudo sed -i 's/$releasever/9/g' /etc/yum.repos.d/docker-ce.repo +# Install as usual +sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +``` + +#### Confirm Docker Installation + +Once installed, make sure that both Docker and the Docker Compose plugin are installed: + +```shell +docker version +docker compose version +``` + +Also make sure Docker is running and is able to run a sample Docker container: + +```shell +docker run hello-world +``` + +#### Java + +If you plan to build Polaris from source yourself or using this tutorial's instructions on a Cloud Provider, you will need to satisfy a few prerequisites first. + +Polaris is built using [gradle](https://gradle.org/) and is compatible with Java 21. We recommend the use of [jenv](https://www.jenv.be/) to manage multiple Java versions. For example, to install Java 21 via [homebrew](https://brew.sh/) and configure it with jenv: + +```shell +cd ~/polaris +brew install openjdk@21 jenv +jenv add $(brew --prefix openjdk@21) +jenv local 21 +``` + +#### jq + +Most Polaris Quickstart scripts require `jq`. Follow the instructions from the [jq](https://jqlang.org/download/) website to download this tool. \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-local-deployment.md b/site/content/in-dev/unreleased/quickstart-local-deployment.md new file mode 100644 index 0000000000..811eb44d10 --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-local-deployment.md @@ -0,0 +1,119 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Deploying Polaris Locally +type: docs +weight: 111 +--- + +Polaris can be deployed via a docker image or as a standalone process. Before starting, be sure that you've satisfied the relevant prerequisites detailed in the parent page. + +## Docker Image + +To start using Polaris in Docker, build and launch Polaris, which is packaged with a Postgres instance, Apache Spark, and Trino. + +```shell +cd ~/polaris +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache +docker compose -f getting-started/eclipselink/docker-compose.yml up +``` + +You should see output for some time as Polaris, Spark, and Trino build and start up. Eventually, you won’t see any more logs and see some logs relating to Spark, resembling the following: + +``` +spark-sql-1 | Spark Web UI available at http://8bc4de8ed854:4040 +spark-sql-1 | Spark master: local[*], Application Id: local-1743745174604 +spark-sql-1 | 25/04/04 05:39:38 WARN SparkSQLCLIDriver: WARNING: Directory for Hive history file: /home/spark does not exist. History will not be available during this session. +spark-sql-1 | 25/04/04 05:39:39 WARN RESTSessionCatalog: Iceberg REST client is missing the OAuth2 server URI configuration and defaults to http://polaris:8181/api/catalogv1/oauth/tokens. This automatic fallback will be removed in a future Iceberg release.It is recommended to configure the OAuth2 endpoint using the 'oauth2-server-uri' property to be prepared. This warning will disappear if the OAuth2 endpoint is explicitly configured. See https://github.com/apache/iceberg/issues/10537 +``` + +Finally, set the following static credentials for interacting with the Polaris server in the following exercises: + +```shell +export CLIENT_ID=root +export CLIENT_SECRET=s3cr3t +``` + +The Docker image pre-configures a sample catalog called `polaris_demo` that uses a local file system. + +## Running Polaris as a Standalone Process + +The easiest way to run Polaris locally is to start the Polaris server from the +`quarkus/server/build` directory (after building Polaris): + +```shell +cd ~/polaris +# Build the server +./gradlew clean :polaris-quarkus-server:assemble +# Start the server +java -jar quarkus/server/build/quarkus-app/quarkus-run.jar +``` + +You should see output for some time as Polaris builds and starts up. Eventually, you won’t see any more logs and should see messages that resemble the following: + +``` +realm: root principal credentials: : +INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) polaris-quarkus-service on JVM (powered by Quarkus ) started in 2.656s. Listening on: http://localhost:8181. Management interface listening on http://0.0.0.0:8182. +INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Profile prod activated. Live Coding activated. +INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Installed features: [...] +``` + +At this point, Polaris is running. + +For this tutorial, we'll launch an instance of Polaris that stores entities only in-memory. This means that any entities that you define will be destroyed when Polaris is shut down. It also means that Polaris will automatically bootstrap itself with root credentials. For more information on how to configure Polaris for production usage, see the [docs]({{% ref "configuring-polaris-for-production" %}}). + +When Polaris is launched using an in-memory metastore, the root principal credentials can be found +in stdout on initial startup. Look for a line that resembles the following: + +``` +realm: root principal credentials: : +``` + +Be sure to take note of these credentials as we'll be using them below. You can also set these credentials as environment variables for use with the Polaris CLI: + +```shell +export CLIENT_ID= +export CLIENT_SECRET= +``` + +### Installing Apache Spark and Trino Locally for Testing + +#### Apache Spark + +If you want to connect to Polaris with [Apache Spark](https://spark.apache.org/), you'll need to start by cloning Spark. As in the [prerequisites]({{% ref "quickstart-installation#Prerequisites" %}}), make sure [git](https://git-scm.com/) is installed first. + +Then, clone Spark and check out a versioned branch. This guide uses [Spark 3.5](https://spark.apache.org/releases/spark-release-3-5-0.html). + +```shell +cd ~ +git clone https://github.com/apache/spark.git +cd ~/spark +git checkout branch-3.5 +``` + +#### Trino +If you want to connect to Polaris with [Trino](https://trino.io/), it is recommended to set up Trino using Docker. As in the [prerequisites]({{% ref "quickstart-installation#Prerequisites" %}}), make sure [Docker](https://www.docker.com/) is installed first + +```shell +docker run --name trino -d -p 8080:8080 trinodb/trino +``` \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart.md b/site/content/in-dev/unreleased/quickstart-using-polaris.md similarity index 58% rename from site/content/in-dev/unreleased/quickstart.md rename to site/content/in-dev/unreleased/quickstart-using-polaris.md index 9ab93c7fec..8054564b1b 100644 --- a/site/content/in-dev/unreleased/quickstart.md +++ b/site/content/in-dev/unreleased/quickstart-using-polaris.md @@ -17,146 +17,15 @@ # specific language governing permissions and limitations # under the License. # -Title: Quick Start +Title: Using Polaris type: docs -weight: 100 +weight: 120 --- -This guide serves as a introduction to several key entities that can be managed with Apache Polaris (Incubating), describes how to build and deploy Polaris locally, and finally includes examples of how to use Polaris with Apache Spark™. - -## Prerequisites - -This guide covers building Polaris, deploying it locally or via [Docker](https://www.docker.com/), and interacting with it using the command-line interface and [Apache Spark](https://spark.apache.org/). Before proceeding with Polaris, be sure to satisfy the relevant prerequisites listed here. - -### Building and Deploying Polaris - -To get the latest Polaris code, you'll need to clone the repository using [git](https://git-scm.com/). You can install git using [homebrew](https://brew.sh/): - -```shell -brew install git -``` - -Then, use git to clone the Polaris repo: - -```shell -cd ~ -git clone https://github.com/apache/polaris.git -``` - -#### With Docker - -If you plan to deploy Polaris inside [Docker](https://www.docker.com/), you'll need to install docker itself. For example, this can be done using [homebrew](https://brew.sh/): - -```shell -brew install --cask docker -``` - -There could be a [Docker permission issues](https://github.com/apache/polaris/pull/971) related to seccomp configuration. To resolve these issues, set the `seccomp` profile to "unconfined" when running a container. For example: - -```shell -docker run --security-opt seccomp=unconfined apache/polaris:latest -``` - -Note: Setting the seccomp profile to "unconfined" disables the default system call filtering, which may pose security risks. Use this configuration with caution, especially in production environments. - -Once installed, make sure Docker is running. - -#### From Source - -If you plan to build Polaris from source yourself, you will need to satisfy a few prerequisites first. - -Polaris is built using [gradle](https://gradle.org/) and is compatible with Java 21. We recommend the use of [jenv](https://www.jenv.be/) to manage multiple Java versions. For example, to install Java 21 via [homebrew](https://brew.sh/) and configure it with jenv: - -```shell -cd ~/polaris -brew install openjdk@21 jenv -jenv add $(brew --prefix openjdk@21) -jenv local 21 -``` - ### Connecting to Polaris Polaris is compatible with any [Apache Iceberg](https://iceberg.apache.org/) client that supports the REST API. Depending on the client you plan to use, refer to the prerequisites below. -#### With Spark - -If you want to connect to Polaris with [Apache Spark](https://spark.apache.org/), you'll need to start by cloning Spark. As [above](#building-and-deploying-polaris), make sure [git](https://git-scm.com/) is installed first. You can install it with [homebrew](https://brew.sh/): - -```shell -brew install git -``` - -Then, clone Spark and check out a versioned branch. This guide uses [Spark 3.5](https://spark.apache.org/releases/spark-release-3-5-0.html). - -```shell -cd ~ -git clone https://github.com/apache/spark.git -cd ~/spark -git checkout branch-3.5 -``` - -## Deploying Polaris - -Polaris can be deployed via a lightweight docker image or as a standalone process. Before starting, be sure that you've satisfied the relevant [prerequisites](#building-and-deploying-polaris) detailed above. - -### Docker Image - -To start using Polaris in Docker, launch Polaris while Docker is running: - -```shell -cd ~/polaris -./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache -docker run -p 8181:8181 -p 8182:8182 apache/polaris:latest -``` - -You should see output for some time as Polaris builds and starts up. Eventually, you won’t see any more logs and should see messages that resemble the following: - -``` -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Apache Polaris Server on JVM (powered by Quarkus ) started in 2.656s. Listening on: http://localhost:8181. Management interface listening on http://0.0.0.0:8182. -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Profile prod activated. -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Installed features: [...] -``` - -### Running Polaris as a Standalone Process - -The easiest way to run Polaris locally is to start the Polaris server from the -`quarkus/server/build` directory (after building Polaris): - -```shell -cd ~/polaris -# Build the server -./gradlew clean :polaris-quarkus-server:assemble -# Start the server -java -jar quarkus/server/build/quarkus-app/quarkus-run.jar -``` - -You should see output for some time as Polaris builds and starts up. Eventually, you won’t see any more logs and should see messages that resemble the following: - -``` -realm: root principal credentials: : -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) polaris-quarkus-service on JVM (powered by Quarkus ) started in 2.656s. Listening on: http://localhost:8181. Management interface listening on http://0.0.0.0:8182. -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Profile prod activated. Live Coding activated. -INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Installed features: [...] -``` - -At this point, Polaris is running. - -For this tutorial, we'll launch an instance of Polaris that stores entities only in-memory. This means that any entities that you define will be destroyed when Polaris is shut down. It also means that Polaris will automatically bootstrap itself with root credentials. For more information on how to configure Polaris for production usage, see the [docs]({{% ref "configuring-polaris-for-production" %}}). - -When Polaris is launched using an in-memory metastore, the root principal credentials can be found -in stdout on initial startup. Look for a line that resembles the following: - -``` -realm: root principal credentials: : -``` - -Be sure to take note of these credentials as we'll be using them below. You can also set these credentials as environment variables for use with the Polaris CLI: - -```shell -export CLIENT_ID= -export CLIENT_SECRET= -``` - ## Defining a Catalog In Polaris, the [catalog]({{% ref "entities#catalog" %}}) is the top-level entity that objects like [tables]({{% ref "entities#table" %}}) and [views]({{% ref "entities#view" %}}) are organized under. With a Polaris service running, you can create a catalog like so: @@ -272,6 +141,8 @@ At this point, we’ve created a principal and granted it the ability to manage ### Connecting with Spark +#### Using a Local Build of Spark + To use a Polaris-managed catalog in [Apache Spark](https://spark.apache.org/), we can configure Spark to use the Iceberg catalog REST API. This guide uses [Apache Spark 3.5](https://spark.apache.org/releases/spark-release-3-5-0.html), but be sure to find [the appropriate iceberg-spark package for your Spark version](https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-spark). From a local Spark clone on the `branch-3.5` branch we can run the following: @@ -279,7 +150,7 @@ This guide uses [Apache Spark 3.5](https://spark.apache.org/releases/spark-relea _Note: the credentials provided here are those for our principal, not the root credentials._ ```shell -bin/spark-shell \ +bin/spark-sql \ --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.1,org.apache.hadoop:hadoop-aws:3.4.0 \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.quickstart_catalog.warehouse=quickstart_catalog \ @@ -300,26 +171,31 @@ Similar to the CLI commands above, this configures Spark to use the Polaris runn Finally, note that we include the `hadoop-aws` package here. If your table is using a different filesystem, be sure to include the appropriate dependency. -Once the Spark session starts, we can create a namespace and table within the catalog: +#### Using Spark SQL from a Docker container + +Attach to the running spark-sql container: +```shell +docker attach $(docker ps -q --filter name=spark-sql) ``` -spark.sql("USE quickstart_catalog") -spark.sql("CREATE NAMESPACE IF NOT EXISTS quickstart_namespace") -spark.sql("CREATE NAMESPACE IF NOT EXISTS quickstart_namespace.schema") -spark.sql("USE NAMESPACE quickstart_namespace.schema") -spark.sql(""" -CREATE TABLE IF NOT EXISTS quickstart_table ( - id BIGINT, data STRING -) -USING ICEBERG -""") + +#### Sample Commands + +Once the Spark session starts, we can create a namespace and table within the catalog: + +```sql +USE polaris; +CREATE NAMESPACE IF NOT EXISTS quickstart_namespace; +CREATE NAMESPACE IF NOT EXISTS quickstart_namespace.schema; +USE NAMESPACE quickstart_namespace.schema; +CREATE TABLE IF NOT EXISTS quickstart_table (id BIGINT, data STRING) USING ICEBERG; ``` We can now use this table like any other: ``` -spark.sql("INSERT INTO quickstart_table VALUES (1, 'some data')") -spark.sql("SELECT * FROM quickstart_table").show(false) +INSERT INTO quickstart_table VALUES (1, 'some data'); +SELECT * FROM quickstart_table; . . . +---+---------+ |id |data | @@ -349,3 +225,64 @@ spark.sql("SELECT * FROM quickstart_table").show(false) org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'quickstart_user' with activated PrincipalRoles '[]' and activated grants via '[quickstart_catalog_role, quickstart_user_role]' is not authorized for op LOAD_TABLE_WITH_READ_DELEGATION ``` + +### Connecting with Trino + +Attach to the running Trino container: + +```shell +docker exec -it trino-trino-1 trino +``` + +You may not see Trino's prompt immediately, type ENTER to see it. A few commands that you can try: + +```sql +SHOW CATALOGS; +SHOW SCHEMAS FROM iceberg; +CREATE SCHEMA iceberg.quickstart_schema; +CREATE TABLE iceberg.quickstart_schema.quickstart_table AS SELECT 1 x; +SELECT * FROM iceberg.quickstart_schema.quickstart_table; +``` + +If at any time access is revoked... + +```shell +./polaris \ + --client-id ${CLIENT_ID} \ + --client-secret ${CLIENT_SECRET} \ + privileges \ + catalog \ + revoke \ + --catalog quickstart_catalog \ + --catalog-role quickstart_catalog_role \ + CATALOG_MANAGE_CONTENT +``` + +Trino will lose access to the table: + +```sql +SELECT * FROM quickstart_table; + +org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'quickstart_user' with activated PrincipalRoles '[]' and activated grants via '[quickstart_catalog_role, quickstart_user_role]' is not authorized for op LOAD_TABLE_WITH_READ_DELEGATION +``` + +### Connecting Using REST APIs + +To access Polaris from the host machine, first request an access token: + +```shell +export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ + --resolve polaris:8181:127.0.0.1 \ + --user ${CLIENT_ID}:${CLIENT_SECRET} \ + -d 'grant_type=client_credentials' \ + -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) +``` + +Then, use the access token in the Authorization header when accessing Polaris: + +```shell +curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" +curl -v http://127.0.0.1:8181/api/management/v1/catalogs/polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" +``` + + From cf01d00664b4d230e06bcb63805731daa52944d4 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 14:35:16 -0700 Subject: [PATCH 02/21] try again --- .../assets/cloud_providers/deploy-aws.sh | 2 ++ .../eclipselink/docker-compose.yml | 29 ------------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index aa922ebad7..8cfd5ea30f 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -73,4 +73,6 @@ sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES -Dquarkus.container-image.build=true \ --no-build-cache + + docker compose -f getting-started/eclipselink/docker-compose.yml up \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 2dc7f92f4f..bae9b3d1d3 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -44,32 +44,6 @@ services: timeout: 10s retries: 10 - postgres: - image: postgres:17.4 - ports: - - "5432:5432" - # set shared memory limit when using docker-compose - shm_size: 128mb - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: POLARIS - POSTGRES_INITDB_ARGS: "--encoding UTF8 --data-checksums" - volumes: - # Bind local conf file to a convenient location in the container - - type: bind - source: ./postgresql.conf - target: /etc/postgresql/postgresql.conf - command: - - "postgres" - - "-c" - - "config_file=/etc/postgresql/postgresql.conf" - healthcheck: - test: "pg_isready -U postgres" - interval: 5s - timeout: 2s - retries: 15 - spark-sql: image: apache/spark:3.5.5-java17-python3 depends_on: @@ -100,9 +74,6 @@ services: trino: image: trinodb/trino:latest - depends_on: - polaris-setup: - condition: service_completed_successfully stdin_open: true tty: true ports: From 08eebebe962983f1f8f398095ffb431e6a061432 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 14:38:28 -0700 Subject: [PATCH 03/21] try again --- getting-started/eclipselink/docker-compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index bae9b3d1d3..3b991a0b14 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -46,9 +46,6 @@ services: spark-sql: image: apache/spark:3.5.5-java17-python3 - depends_on: - polaris-setup: - condition: service_completed_successfully stdin_open: true tty: true ports: From 79f741623535b07f4ef1d1969e383a5c13d532c3 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 15:44:27 -0700 Subject: [PATCH 04/21] try again --- .../assets/cloud_providers/deploy-aws.sh | 4 +-- getting-started/eclipselink/README.md | 16 +++------ .../docker-compose-bootstrap-db.yml | 35 +++++++++++++++++++ .../eclipselink/docker-compose-postgres.yml | 16 --------- 4 files changed, 41 insertions(+), 30 deletions(-) create mode 100644 getting-started/eclipselink/docker-compose-bootstrap-db.yml diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 8cfd5ea30f..f07bfbb9f5 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -73,6 +73,4 @@ sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES -Dquarkus.container-image.build=true \ --no-build-cache - - -docker compose -f getting-started/eclipselink/docker-compose.yml up \ No newline at end of file +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up \ No newline at end of file diff --git a/getting-started/eclipselink/README.md b/getting-started/eclipselink/README.md index a85565104c..451f9cbe74 100644 --- a/getting-started/eclipselink/README.md +++ b/getting-started/eclipselink/README.md @@ -32,19 +32,13 @@ This example requires `jq` to be installed on your machine. --no-build-cache ``` -2. Start the database docker compose group by running the following command from the root of the repository: - - ```shell - docker compose -f getting-started/eclipselink/docker-compose-postgres.yml up - ``` - -3. Start the docker compose group by running the following command from the root of the repository: +2. Start the docker compose group by running the following command from the root of the repository: ```shell - docker compose -f getting-started/eclipselink/docker-compose.yml up + docker compose -f getting-started/eclipselink/docker-compose-postgres.yml -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up ``` -4. Using spark-sql: attach to the running spark-sql container: +3. Using spark-sql: attach to the running spark-sql container: ```shell docker attach $(docker ps -q --filter name=spark-sql) @@ -60,7 +54,7 @@ This example requires `jq` to be installed on your machine. SELECT * FROM table1; ``` -5. To access Polaris from the host machine, first request an access token: +4. To access Polaris from the host machine, first request an access token: ```shell export POLARIS_TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ @@ -70,7 +64,7 @@ This example requires `jq` to be installed on your machine. -d 'scope=PRINCIPAL_ROLE:ALL' | jq -r .access_token) ``` -6. Then, use the access token in the Authorization header when accessing Polaris: +5. Then, use the access token in the Authorization header when accessing Polaris: ```shell curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" diff --git a/getting-started/eclipselink/docker-compose-bootstrap-db.yml b/getting-started/eclipselink/docker-compose-bootstrap-db.yml new file mode 100644 index 0000000000..d7a36e3d18 --- /dev/null +++ b/getting-started/eclipselink/docker-compose-bootstrap-db.yml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-bootstrap: + # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions + image: apache/polaris-admin-tool:postgres-latest + depends_on: + postgres: + condition: service_healthy + environment: + polaris.persistence.type: eclipse-link + polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml + volumes: + - ../assets/eclipselink/:/deployments/config/eclipselink + command: + - "bootstrap" + - "--realm=POLARIS" + - "--credential=POLARIS,root,s3cr3t" \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose-postgres.yml b/getting-started/eclipselink/docker-compose-postgres.yml index 35c724299e..1e86a8c0ea 100644 --- a/getting-started/eclipselink/docker-compose-postgres.yml +++ b/getting-started/eclipselink/docker-compose-postgres.yml @@ -18,22 +18,6 @@ # services: - polaris-bootstrap: - # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions - image: apache/polaris-admin-tool:postgres-latest - depends_on: - postgres: - condition: service_healthy - environment: - polaris.persistence.type: eclipse-link - polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml - volumes: - - ../assets/eclipselink/:/deployments/config/eclipselink - command: - - "bootstrap" - - "--realm=POLARIS" - - "--credential=POLARIS,root,s3cr3t" - postgres: image: postgres:17.4 ports: From 3d3d50646f4e25c46456e589d646b5b021315b5f Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 15:51:58 -0700 Subject: [PATCH 05/21] try again --- getting-started/eclipselink/docker-compose-bootstrap-db.yml | 3 --- site/content/in-dev/unreleased/quickstart-deploy-aws.md | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/getting-started/eclipselink/docker-compose-bootstrap-db.yml b/getting-started/eclipselink/docker-compose-bootstrap-db.yml index d7a36e3d18..6e1ab80751 100644 --- a/getting-started/eclipselink/docker-compose-bootstrap-db.yml +++ b/getting-started/eclipselink/docker-compose-bootstrap-db.yml @@ -21,9 +21,6 @@ services: polaris-bootstrap: # IMPORTANT: the image MUST contain the Postgres JDBC driver and EclipseLink dependencies, see README for instructions image: apache/polaris-admin-tool:postgres-latest - depends_on: - postgres: - condition: service_healthy environment: polaris.persistence.type: eclipse-link polaris.persistence.eclipselink.configuration-file: /deployments/config/eclipselink/persistence.xml diff --git a/site/content/in-dev/unreleased/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/quickstart-deploy-aws.md index b30670183e..56bbb35e9c 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/quickstart-deploy-aws.md @@ -42,4 +42,10 @@ Also, set the following static credentials for interacting with the Polaris serv ```shell export CLIENT_ID=root export CLIENT_SECRET=s3cr3t +``` + +To take down the Polaris server, run the following commands: + +```shell +docker compose -f getting-started/eclipselink/docker-compose.yml down ``` \ No newline at end of file From 66826dd1ce806baaf336b3aee5b0f0b01393945b Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 15:59:27 -0700 Subject: [PATCH 06/21] try again --- getting-started/eclipselink/docker-compose.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 3b991a0b14..b0f31e3e6a 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -44,6 +44,15 @@ services: timeout: 10s retries: 10 + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + volumes: + - ../assets/polaris/:/polaris + entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' + spark-sql: image: apache/spark:3.5.5-java17-python3 stdin_open: true From b458e68308997925d64f5b0d3bb93d5aaca5603c Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 17:47:14 -0700 Subject: [PATCH 07/21] try now --- .../assets/polaris/create-catalog.sh | 4 +-- .../unreleased/quickstart-using-polaris.md | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 048b32836a..7416c7dbec 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -33,7 +33,7 @@ echo echo "Obtained access token: ${token}" echo -echo Creating a catalog named polaris_demo... +echo Creating a catalog named quickstart_catalog... curl -s -H "Authorization: Bearer ${token}" \ -H 'Accept: application/json' \ @@ -45,7 +45,7 @@ curl -s -H "Authorization: Bearer ${token}" \ "type": "INTERNAL", "readOnly": false, "properties": { - "default-base-location": "file:///tmp/quickstart_catalog/" + "default-base-location": "file:///var/tmp/quickstart_catalog/" }, "storageConfigInfo": { "storageType": "FILE", diff --git a/site/content/in-dev/unreleased/quickstart-using-polaris.md b/site/content/in-dev/unreleased/quickstart-using-polaris.md index 8054564b1b..e5d482e65c 100644 --- a/site/content/in-dev/unreleased/quickstart-using-polaris.md +++ b/site/content/in-dev/unreleased/quickstart-using-polaris.md @@ -173,6 +173,15 @@ Finally, note that we include the `hadoop-aws` package here. If your table is us #### Using Spark SQL from a Docker container +Replace the credentials used in the Docker container using the following code: + +```shell +USER_CLIENT_ID="XXXX" +USER_CLIENT_SECRET="YYYY" +sed -i "s/^\(.*spark\.sql\.catalog\.polaris\.credential=\).*/\1${USER_CLIENT_ID}:${USER_CLIENT_SECRET}\",/" getting-started/eclipselink/docker-compose.yml +docker compose -f getting-started/eclipselink/docker-compose.yml up -d +``` + Attach to the running spark-sql container: ```shell @@ -221,17 +230,27 @@ If at any time access is revoked... Spark will lose access to the table: ``` -spark.sql("SELECT * FROM quickstart_table").show(false) +SELECT * FROM quickstart_table; org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'quickstart_user' with activated PrincipalRoles '[]' and activated grants via '[quickstart_catalog_role, quickstart_user_role]' is not authorized for op LOAD_TABLE_WITH_READ_DELEGATION ``` ### Connecting with Trino +Replace the credentials used in the Docker container using the following code: + +```shell +USER_CLIENT_ID="XXXX" +USER_CLIENT_SECRET="YYYY" +sed -i "s/^\(iceberg\.rest-catalog\.oauth2\.credential=\).*/\1${USER_CLIENT_ID}:${USER_CLIENT_SECRET}/" getting-started/eclipselink/trino-config/catalog/iceberg.properties +docker compose -f getting-started/eclipselink/docker-compose.yml down trino +docker compose -f getting-started/eclipselink/docker-compose.yml up -d +``` + Attach to the running Trino container: ```shell -docker exec -it trino-trino-1 trino +docker exec -it eclipselink-trino-1 trino ``` You may not see Trino's prompt immediately, type ENTER to see it. A few commands that you can try: @@ -261,7 +280,7 @@ If at any time access is revoked... Trino will lose access to the table: ```sql -SELECT * FROM quickstart_table; +SELECT * FROM iceberg.quickstart_schema.quickstart_table; org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'quickstart_user' with activated PrincipalRoles '[]' and activated grants via '[quickstart_catalog_role, quickstart_user_role]' is not authorized for op LOAD_TABLE_WITH_READ_DELEGATION ``` From 569ccd81aa872c37a1aee0b84e9694fad18d30ff Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 17:57:31 -0700 Subject: [PATCH 08/21] should work --- getting-started/assets/cloud_providers/deploy-aws.sh | 2 +- getting-started/assets/polaris/create-catalog.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index f07bfbb9f5..5dfcc26dce 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -73,4 +73,4 @@ sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES -Dquarkus.container-image.build=true \ --no-build-cache -docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up \ No newline at end of file +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 7416c7dbec..b7b8e0f51d 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -50,7 +50,7 @@ curl -s -H "Authorization: Bearer ${token}" \ "storageConfigInfo": { "storageType": "FILE", "allowedLocations": [ - "file:///tmp" + "file:///var/tmp" ] } } From cbca8f5d1057462681624a4bc8b4ba58efe9fb8f Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 10 Apr 2025 18:51:36 -0700 Subject: [PATCH 09/21] AWS First Draft Complete --- .../eclipselink/trino-config/catalog/iceberg.properties | 2 +- site/content/in-dev/unreleased/quickstart-using-polaris.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/getting-started/eclipselink/trino-config/catalog/iceberg.properties b/getting-started/eclipselink/trino-config/catalog/iceberg.properties index 1cd0a0e7a2..28c3c61faa 100644 --- a/getting-started/eclipselink/trino-config/catalog/iceberg.properties +++ b/getting-started/eclipselink/trino-config/catalog/iceberg.properties @@ -23,6 +23,6 @@ iceberg.rest-catalog.uri=http://polaris:8181/api/catalog iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.oauth2.credential=root:s3cr3t iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL -iceberg.rest-catalog.warehouse=polaris +iceberg.rest-catalog.warehouse=quickstart_catalog # Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration fs.hadoop.enabled=true diff --git a/site/content/in-dev/unreleased/quickstart-using-polaris.md b/site/content/in-dev/unreleased/quickstart-using-polaris.md index e5d482e65c..4f7dea18b5 100644 --- a/site/content/in-dev/unreleased/quickstart-using-polaris.md +++ b/site/content/in-dev/unreleased/quickstart-using-polaris.md @@ -230,7 +230,7 @@ If at any time access is revoked... Spark will lose access to the table: ``` -SELECT * FROM quickstart_table; +INSERT INTO quickstart_table VALUES (1, 'some data'); org.apache.iceberg.exceptions.ForbiddenException: Forbidden: Principal 'quickstart_user' with activated PrincipalRoles '[]' and activated grants via '[quickstart_catalog_role, quickstart_user_role]' is not authorized for op LOAD_TABLE_WITH_READ_DELEGATION ``` @@ -301,7 +301,7 @@ Then, use the access token in the Authorization header when accessing Polaris: ```shell curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" -curl -v http://127.0.0.1:8181/api/management/v1/catalogs/polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" +curl -v http://127.0.0.1:8181/api/management/v1/catalogs/quickstart_catalog -H "Authorization: Bearer $POLARIS_TOKEN" ``` From ec4782b3195f90ee6105009b6bd767fcffc5883b Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Fri, 11 Apr 2025 16:58:53 -0700 Subject: [PATCH 10/21] ensure file changed --- .../eclipselink/trino-config/catalog/iceberg.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getting-started/eclipselink/trino-config/catalog/iceberg.properties b/getting-started/eclipselink/trino-config/catalog/iceberg.properties index 28c3c61faa..057c9950f2 100644 --- a/getting-started/eclipselink/trino-config/catalog/iceberg.properties +++ b/getting-started/eclipselink/trino-config/catalog/iceberg.properties @@ -23,6 +23,6 @@ iceberg.rest-catalog.uri=http://polaris:8181/api/catalog iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.oauth2.credential=root:s3cr3t iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL -iceberg.rest-catalog.warehouse=quickstart_catalog +iceberg.rest-catalog.warehouse=quickstart_catalog # Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration fs.hadoop.enabled=true From cba005d75531bf39db097c3a4daf6732a8e25367 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Fri, 11 Apr 2025 17:03:26 -0700 Subject: [PATCH 11/21] Azure First Draft Complete --- .../assets/cloud_providers/deploy-azure.sh | 72 +++++++++++++++++++ .../trino-config/catalog/iceberg.properties | 2 +- .../unreleased/quickstart-deploy-aws.md | 3 +- .../unreleased/quickstart-deploy-azure.md | 48 +++++++++++++ .../unreleased/quickstart-installation.md | 2 + 5 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 getting-started/assets/cloud_providers/deploy-azure.sh create mode 100644 site/content/in-dev/unreleased/quickstart-deploy-azure.md diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh new file mode 100644 index 0000000000..0f31d59c6f --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -0,0 +1,72 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) +# +#DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ +# --instance-ids $EC2_INSTANCE_ID \ +# --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ +# --output json) +# +#CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") +# +#CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') +# +#ALL_SUBNETS=$(aws ec2 describe-subnets \ +# --region $CURRENT_REGION \ +# --query 'Subnets[*].{SubnetId:SubnetId}' \ +# --output json \ +# | jq -r '[.[]["SubnetId"]] | join(" ")') +# +# +#aws rds create-db-subnet-group \ +# --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ +# --db-subnet-group-description "Apache Polaris Quickstart DB Subnet Group" \ +# --subnet-ids $ALL_SUBNETS +# +#DB_INSTANCE_INFO=$(aws rds create-db-instance \ +# --db-instance-identifier polaris-backend-test-$RANDOM_SUFFIX \ +# --db-instance-class db.t3.micro \ +# --engine postgres \ +# --master-username postgres \ +# --master-user-password postgres \ +# --db-name POLARIS \ +# --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ +# --allocated-storage 10) + +CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') +CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) + +CREATE_DB_RESPONSE=$(az postgres flexible-server create -l $CURRENT_REGION -g $CURRENT_RESOURCE_GROUP -n polaris-backend-test-$RANDOM_SUFFIX -u postgres -p postgres -y) + +az postgres flexible-server db create -g $CURRENT_RESOURCE_GROUP -s polaris-backend-test-$RANDOM_SUFFIX -d POLARIS + +POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host') + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/getting-started/eclipselink/trino-config/catalog/iceberg.properties b/getting-started/eclipselink/trino-config/catalog/iceberg.properties index 057c9950f2..28c3c61faa 100644 --- a/getting-started/eclipselink/trino-config/catalog/iceberg.properties +++ b/getting-started/eclipselink/trino-config/catalog/iceberg.properties @@ -23,6 +23,6 @@ iceberg.rest-catalog.uri=http://polaris:8181/api/catalog iceberg.rest-catalog.security=OAUTH2 iceberg.rest-catalog.oauth2.credential=root:s3cr3t iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL -iceberg.rest-catalog.warehouse=quickstart_catalog +iceberg.rest-catalog.warehouse=quickstart_catalog # Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration fs.hadoop.enabled=true diff --git a/site/content/in-dev/unreleased/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/quickstart-deploy-aws.md index 56bbb35e9c..7ab14c6d70 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/quickstart-deploy-aws.md @@ -34,7 +34,8 @@ The requirements to run the script below are: * "rds:CreateDBSubnetGroup" ```shell - +chmod +x getting-started/assets/cloud_providers/deploy-aws.sh +./getting-started/assets/cloud_providers/deploy-aws.sh ``` Also, set the following static credentials for interacting with the Polaris server in the following exercises: diff --git a/site/content/in-dev/unreleased/quickstart-deploy-azure.md b/site/content/in-dev/unreleased/quickstart-deploy-azure.md new file mode 100644 index 0000000000..e4eacd637f --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-deploy-azure.md @@ -0,0 +1,48 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Deploying Polaris on AWS +type: docs +weight: 112 +--- + +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Azure Database for PostgreSQL - Flexible Server])(https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/overview) instance, which will be used as the backend Postgres instance holding all Polaris data. +Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. + +The requirements to run the script below are: +* Install the AZ CLI, if it is not already installed on the Azure VM. Instructions to download the AZ CLI can be found [here](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). +* You must be logged into the AZ CLI. Please run `az account show` to ensure that you are logged in prior to running this script. + +```shell +chmod +x getting-started/assets/cloud_providers/deploy-aws.sh +./getting-started/assets/cloud_providers/deploy-aws.sh +``` + +Also, set the following static credentials for interacting with the Polaris server in the following exercises: + +```shell +export CLIENT_ID=root +export CLIENT_SECRET=s3cr3t +``` + +To take down the Polaris server, run the following commands: + +```shell +docker compose -f getting-started/eclipselink/docker-compose.yml down +``` \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-installation.md b/site/content/in-dev/unreleased/quickstart-installation.md index 1dd60f6733..1ffcc1603c 100644 --- a/site/content/in-dev/unreleased/quickstart-installation.md +++ b/site/content/in-dev/unreleased/quickstart-installation.md @@ -111,6 +111,8 @@ jenv add $(brew --prefix openjdk@21) jenv local 21 ``` +Ensure that `java --version` and `javac` both return non-zero responses. + #### jq Most Polaris Quickstart scripts require `jq`. Follow the instructions from the [jq](https://jqlang.org/download/) website to download this tool. \ No newline at end of file From e805d28ddef701c83eb86fe93ced2d8e0db70880 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Fri, 11 Apr 2025 17:04:58 -0700 Subject: [PATCH 12/21] Azure First Draft, pt. 2 --- .../assets/cloud_providers/deploy-azure.sh | 33 ------------------- .../unreleased/quickstart-deploy-azure.md | 4 +-- 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index 0f31d59c6f..ee1761c6f6 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -17,39 +17,6 @@ # under the License. # -#EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) -# -#DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ -# --instance-ids $EC2_INSTANCE_ID \ -# --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ -# --output json) -# -#CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") -# -#CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') -# -#ALL_SUBNETS=$(aws ec2 describe-subnets \ -# --region $CURRENT_REGION \ -# --query 'Subnets[*].{SubnetId:SubnetId}' \ -# --output json \ -# | jq -r '[.[]["SubnetId"]] | join(" ")') -# -# -#aws rds create-db-subnet-group \ -# --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ -# --db-subnet-group-description "Apache Polaris Quickstart DB Subnet Group" \ -# --subnet-ids $ALL_SUBNETS -# -#DB_INSTANCE_INFO=$(aws rds create-db-instance \ -# --db-instance-identifier polaris-backend-test-$RANDOM_SUFFIX \ -# --db-instance-class db.t3.micro \ -# --engine postgres \ -# --master-username postgres \ -# --master-user-password postgres \ -# --db-name POLARIS \ -# --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ -# --allocated-storage 10) - CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) diff --git a/site/content/in-dev/unreleased/quickstart-deploy-azure.md b/site/content/in-dev/unreleased/quickstart-deploy-azure.md index e4eacd637f..3a7153d811 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-azure.md +++ b/site/content/in-dev/unreleased/quickstart-deploy-azure.md @@ -30,8 +30,8 @@ The requirements to run the script below are: * You must be logged into the AZ CLI. Please run `az account show` to ensure that you are logged in prior to running this script. ```shell -chmod +x getting-started/assets/cloud_providers/deploy-aws.sh -./getting-started/assets/cloud_providers/deploy-aws.sh +chmod +x getting-started/assets/cloud_providers/deploy-azure.sh +./getting-started/assets/cloud_providers/deploy-azure.sh ``` Also, set the following static credentials for interacting with the Polaris server in the following exercises: From 2e824b91e06da9e5c2d38616a9aae610e789a8e3 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Fri, 11 Apr 2025 17:42:55 -0700 Subject: [PATCH 13/21] Azure Completed --- getting-started/eclipselink/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index b0f31e3e6a..4861675991 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -43,6 +43,7 @@ services: interval: 2s timeout: 10s retries: 10 + start_period: 10s polaris-setup: image: alpine/curl From e62ce3320b48842cbb8e8cb8871e9ba08aff9378 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Sun, 13 Apr 2025 18:07:05 -0700 Subject: [PATCH 14/21] GCP First Draft --- .../assets/cloud_providers/deploy-aws.sh | 8 ++-- .../assets/cloud_providers/deploy-azure.sh | 5 +- .../assets/cloud_providers/deploy-gcp.sh | 47 ++++++++++++++++++ .../unreleased/quickstart-deploy-gcp.md | 48 +++++++++++++++++++ 4 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 getting-started/assets/cloud_providers/deploy-gcp.sh create mode 100644 site/content/in-dev/unreleased/quickstart-deploy-gcp.md diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 5dfcc26dce..c943eb69b8 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -35,20 +35,22 @@ ALL_SUBNETS=$(aws ec2 describe-subnets \ | jq -r '[.[]["SubnetId"]] | join(" ")') RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8) +SUBNET_GROUP_NAME="polaris-db-subnet-group-$RANDOM_SUFFIX" +INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" aws rds create-db-subnet-group \ - --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ + --db-subnet-group-name $SUBNET_GROUP_NAME \ --db-subnet-group-description "Apache Polaris Quickstart DB Subnet Group" \ --subnet-ids $ALL_SUBNETS DB_INSTANCE_INFO=$(aws rds create-db-instance \ - --db-instance-identifier polaris-backend-test-$RANDOM_SUFFIX \ + --db-instance-identifier $INSTANCE_NAME \ --db-instance-class db.t3.micro \ --engine postgres \ --master-username postgres \ --master-user-password postgres \ --db-name POLARIS \ - --db-subnet-group-name polaris-db-subnet-group-$RANDOM_SUFFIX \ + --db-subnet-group-name $SUBNET_GROUP_NAME \ --allocated-storage 10) DB_ARN=$(echo $DB_INSTANCE_INFO | jq -r '.["DBInstance"]["DBInstanceArn"]') diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index ee1761c6f6..76ee85432b 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -20,10 +20,11 @@ CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) +INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" -CREATE_DB_RESPONSE=$(az postgres flexible-server create -l $CURRENT_REGION -g $CURRENT_RESOURCE_GROUP -n polaris-backend-test-$RANDOM_SUFFIX -u postgres -p postgres -y) +CREATE_DB_RESPONSE=$(az postgres flexible-server create -l $CURRENT_REGION -g $CURRENT_RESOURCE_GROUP -n $INSTANCE_NAME -u postgres -p postgres -y) -az postgres flexible-server db create -g $CURRENT_RESOURCE_GROUP -s polaris-backend-test-$RANDOM_SUFFIX -d POLARIS +az postgres flexible-server db create -g $CURRENT_RESOURCE_GROUP -s $INSTANCE_NAME -d POLARIS POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host') diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh new file mode 100644 index 0000000000..de559247cc --- /dev/null +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -0,0 +1,47 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +CURRENT_REGION=$(curl -H "Metadata-Flavor: Google" \ + "http://169.254.169.254/computeMetadata/v1/instance/zone" \ + | awk -F/ '{print $NF}' | sed 's/-[a-z]$//') +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) +INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" + +gcloud sql instances create $INSTANCE_NAME \ + --database-version=POSTGRES_17 \ + --region=$CURRENT_REGION \ + --tier=db-perf-optimized-N-4 \ + --edition=ENTERPRISE_PLUS \ + --root-password=postgres + + +gcloud sql databases create POLARIS --instance=$INSTANCE_NAME + +POSTGRES_ADDR=$(gcloud sql instances describe $INSTANCE_NAME --format="get(ipAddresses[0].ipAddress)") + +FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') +sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" + +./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ + -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ + -Dquarkus.container-image.tag=postgres-latest \ + -Dquarkus.container-image.build=true \ + --no-build-cache + +docker compose -f getting-started/eclipselink/docker-compose-bootstrap-db.yml -f getting-started/eclipselink/docker-compose.yml up -d \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-deploy-gcp.md b/site/content/in-dev/unreleased/quickstart-deploy-gcp.md new file mode 100644 index 0000000000..5fae5f6a19 --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart-deploy-gcp.md @@ -0,0 +1,48 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Title: Deploying Polaris on AWS +type: docs +weight: 112 +--- + +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start a [Cloud SQL for PostgreSQL])(https://cloud.google.com/sql/docs/postgres) instance, which will be used as the backend Postgres instance holding all Polaris data. +Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. + +The requirements to run the script below are: +* Install the `gcloud` CLI, if it is not already installed on the GCP VM. Instructions to download the `gcloud` CLI can be found [here](https://cloud.google.com/sdk/docs/install). +* Ensure the `Cloud SQL Admin API` has been enabled in your project and that your VM's Principal has access to the correct role: `roles/cloudsql.admin`. + +```shell +chmod +x getting-started/assets/cloud_providers/deploy-gcp.sh +./getting-started/assets/cloud_providers/deploy-gcp.sh +``` + +Also, set the following static credentials for interacting with the Polaris server in the following exercises: + +```shell +export CLIENT_ID=root +export CLIENT_SECRET=s3cr3t +``` + +To take down the Polaris server, run the following commands: + +```shell +docker compose -f getting-started/eclipselink/docker-compose.yml down +``` \ No newline at end of file From 9a2de0d18f64c2b02a91cda229632c19a56b3530 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 14 Apr 2025 13:12:55 -0700 Subject: [PATCH 15/21] GCP Verified --- .../assets/cloud_providers/deploy-gcp.sh | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index de559247cc..5da93b5585 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -17,23 +17,26 @@ # under the License. # -CURRENT_REGION=$(curl -H "Metadata-Flavor: Google" \ - "http://169.254.169.254/computeMetadata/v1/instance/zone" \ - | awk -F/ '{print $NF}' | sed 's/-[a-z]$//') +CURRENT_ZONE=$(curl -H "Metadata-Flavor: Google" "http://169.254.169.254/computeMetadata/v1/instance/zone" | awk -F/ '{print $NF}') +CURRENT_REGION=$(echo $CURRENT_ZONE | sed 's/-[a-z]$//') +VM_INSTANCE_NAME=$(curl -H "Metadata-Flavor: Google" "http://169.254.169.254/computeMetadata/v1/instance/name") RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) -INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" +DB_INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" -gcloud sql instances create $INSTANCE_NAME \ +INSTANCE_IP=$(gcloud compute instances describe $VM_INSTANCE_NAME --zone=$CURRENT_ZONE --format="get(networkInterfaces[0].accessConfigs[0].natIP)") + + +gcloud sql instances create $DB_INSTANCE_NAME \ --database-version=POSTGRES_17 \ --region=$CURRENT_REGION \ --tier=db-perf-optimized-N-4 \ --edition=ENTERPRISE_PLUS \ - --root-password=postgres - + --root-password=postgres \ + --authorized-networks="$INSTANCE_IP/32" -gcloud sql databases create POLARIS --instance=$INSTANCE_NAME +gcloud sql databases create POLARIS --instance=$DB_INSTANCE_NAME -POSTGRES_ADDR=$(gcloud sql instances describe $INSTANCE_NAME --format="get(ipAddresses[0].ipAddress)") +POSTGRES_ADDR=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="get(ipAddresses[0].ipAddress)") FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" From 76f33b14720365e96167911948d2987dbf590b70 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 14 Apr 2025 15:27:57 -0700 Subject: [PATCH 16/21] File structure fixed --- .../in-dev/unreleased/quickstart/_index.md | 27 +++++++++++++++++++ .../deploying-polaris/_index.md} | 0 .../quickstart-deploy-aws.md | 4 +-- .../quickstart-deploy-azure.md | 4 +-- .../quickstart-deploy-gcp.md | 4 +-- .../quickstart-local-deployment.md | 8 +++--- .../install-dependencies.md} | 18 ++++++------- .../using-polaris.md} | 16 +++++------ 8 files changed, 54 insertions(+), 27 deletions(-) create mode 100644 site/content/in-dev/unreleased/quickstart/_index.md rename site/content/in-dev/unreleased/{quickstart-deployment-parent.md => quickstart/deploying-polaris/_index.md} (100%) rename site/content/in-dev/unreleased/{ => quickstart/deploying-polaris}/quickstart-deploy-aws.md (90%) rename site/content/in-dev/unreleased/{ => quickstart/deploying-polaris}/quickstart-deploy-azure.md (88%) rename site/content/in-dev/unreleased/{ => quickstart/deploying-polaris}/quickstart-deploy-gcp.md (89%) rename site/content/in-dev/unreleased/{ => quickstart/deploying-polaris}/quickstart-local-deployment.md (92%) rename site/content/in-dev/unreleased/{quickstart-installation.md => quickstart/install-dependencies.md} (96%) rename site/content/in-dev/unreleased/{quickstart-using-polaris.md => quickstart/using-polaris.md} (88%) diff --git a/site/content/in-dev/unreleased/quickstart/_index.md b/site/content/in-dev/unreleased/quickstart/_index.md new file mode 100644 index 0000000000..bf87ac0540 --- /dev/null +++ b/site/content/in-dev/unreleased/quickstart/_index.md @@ -0,0 +1,27 @@ +--- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#linkTitle: 'Quick Start' +title: 'Quick Start' +type: docs +weight: 100 +build: +# list: never + render: never +--- \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-deployment-parent.md b/site/content/in-dev/unreleased/quickstart/deploying-polaris/_index.md similarity index 100% rename from site/content/in-dev/unreleased/quickstart-deployment-parent.md rename to site/content/in-dev/unreleased/quickstart/deploying-polaris/_index.md diff --git a/site/content/in-dev/unreleased/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-aws.md similarity index 90% rename from site/content/in-dev/unreleased/quickstart-deploy-aws.md rename to site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-aws.md index 7ab14c6d70..267ec26f59 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-aws.md @@ -17,12 +17,12 @@ # specific language governing permissions and limitations # under the License. # -Title: Deploying Polaris on AWS +Title: Deploying Polaris on Amazon Web Services (AWS) type: docs weight: 112 --- -Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Amazon RDS for PostgreSQL])(https://aws.amazon.com/rds/postgresql/) instance, which will be used as the backend Postgres instance holding all Polaris data. +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Amazon RDS for PostgreSQL](https://aws.amazon.com/rds/postgresql/) instance, which will be used as the backend Postgres instance holding all Polaris data. Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. The requirements to run the script below are: diff --git a/site/content/in-dev/unreleased/quickstart-deploy-azure.md b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-azure.md similarity index 88% rename from site/content/in-dev/unreleased/quickstart-deploy-azure.md rename to site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-azure.md index 3a7153d811..f2b6b6c72b 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-azure.md +++ b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-azure.md @@ -17,12 +17,12 @@ # specific language governing permissions and limitations # under the License. # -Title: Deploying Polaris on AWS +Title: Deploying Polaris on Azure type: docs weight: 112 --- -Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Azure Database for PostgreSQL - Flexible Server])(https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/overview) instance, which will be used as the backend Postgres instance holding all Polaris data. +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start an [Azure Database for PostgreSQL - Flexible Server](https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/overview) instance, which will be used as the backend Postgres instance holding all Polaris data. Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. The requirements to run the script below are: diff --git a/site/content/in-dev/unreleased/quickstart-deploy-gcp.md b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-gcp.md similarity index 89% rename from site/content/in-dev/unreleased/quickstart-deploy-gcp.md rename to site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-gcp.md index 5fae5f6a19..253b3ffd90 100644 --- a/site/content/in-dev/unreleased/quickstart-deploy-gcp.md +++ b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-deploy-gcp.md @@ -17,12 +17,12 @@ # specific language governing permissions and limitations # under the License. # -Title: Deploying Polaris on AWS +Title: Deploying Polaris on Google Cloud Platform (GCP) type: docs weight: 112 --- -Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start a [Cloud SQL for PostgreSQL])(https://cloud.google.com/sql/docs/postgres) instance, which will be used as the backend Postgres instance holding all Polaris data. +Build and launch Polaris using the AWS Startup Script at the location provided in the command below. This script will start a [Cloud SQL for PostgreSQL](https://cloud.google.com/sql/docs/postgres) instance, which will be used as the backend Postgres instance holding all Polaris data. Additionally, Polaris will be bootstrapped to use this database and Docker containers will be spun up for Spark SQL and Trino. The requirements to run the script below are: diff --git a/site/content/in-dev/unreleased/quickstart-local-deployment.md b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-local-deployment.md similarity index 92% rename from site/content/in-dev/unreleased/quickstart-local-deployment.md rename to site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-local-deployment.md index 811eb44d10..1bed23a514 100644 --- a/site/content/in-dev/unreleased/quickstart-local-deployment.md +++ b/site/content/in-dev/unreleased/quickstart/deploying-polaris/quickstart-local-deployment.md @@ -66,7 +66,7 @@ cd ~/polaris # Build the server ./gradlew clean :polaris-quarkus-server:assemble # Start the server -java -jar quarkus/server/build/quarkus-app/quarkus-run.jar +./gradlew run ``` You should see output for some time as Polaris builds and starts up. Eventually, you won’t see any more logs and should see messages that resemble the following: @@ -80,7 +80,7 @@ INFO [io.quarkus] [,] [,,,] (Quarkus Main Thread) Installed features: [...] At this point, Polaris is running. -For this tutorial, we'll launch an instance of Polaris that stores entities only in-memory. This means that any entities that you define will be destroyed when Polaris is shut down. It also means that Polaris will automatically bootstrap itself with root credentials. For more information on how to configure Polaris for production usage, see the [docs]({{% ref "configuring-polaris-for-production" %}}). +For this tutorial, we'll launch an instance of Polaris that stores entities only in-memory. This means that any entities that you define will be destroyed when Polaris is shut down. It also means that Polaris will automatically bootstrap itself with root credentials. For more information on how to configure Polaris for production usage, see the [docs]({{% relref "../../configuring-polaris-for-production" %}}). When Polaris is launched using an in-memory metastore, the root principal credentials can be found in stdout on initial startup. Look for a line that resembles the following: @@ -100,7 +100,7 @@ export CLIENT_SECRET= #### Apache Spark -If you want to connect to Polaris with [Apache Spark](https://spark.apache.org/), you'll need to start by cloning Spark. As in the [prerequisites]({{% ref "quickstart-installation#Prerequisites" %}}), make sure [git](https://git-scm.com/) is installed first. +If you want to connect to Polaris with [Apache Spark](https://spark.apache.org/), you'll need to start by cloning Spark. As in the [prerequisites]({{% ref "_index#git" %}}), make sure [git](https://git-scm.com/) is installed first. Then, clone Spark and check out a versioned branch. This guide uses [Spark 3.5](https://spark.apache.org/releases/spark-release-3-5-0.html). @@ -112,7 +112,7 @@ git checkout branch-3.5 ``` #### Trino -If you want to connect to Polaris with [Trino](https://trino.io/), it is recommended to set up Trino using Docker. As in the [prerequisites]({{% ref "quickstart-installation#Prerequisites" %}}), make sure [Docker](https://www.docker.com/) is installed first +If you want to connect to Polaris with [Trino](https://trino.io/), it is recommended to set up Trino using Docker. As in the [prerequisites]({{% ref "_index#docker" %}}), make sure [Docker](https://www.docker.com/) is installed first ```shell docker run --name trino -d -p 8080:8080 trinodb/trino diff --git a/site/content/in-dev/unreleased/quickstart-installation.md b/site/content/in-dev/unreleased/quickstart/install-dependencies.md similarity index 96% rename from site/content/in-dev/unreleased/quickstart-installation.md rename to site/content/in-dev/unreleased/quickstart/install-dependencies.md index 1ffcc1603c..9e3a0aabfc 100644 --- a/site/content/in-dev/unreleased/quickstart-installation.md +++ b/site/content/in-dev/unreleased/quickstart/install-dependencies.md @@ -17,18 +17,18 @@ # specific language governing permissions and limitations # under the License. # -Title: Quick Start +Title: Installing Dependencies type: docs weight: 100 --- This guide serves as an introduction to several key entities that can be managed with Apache Polaris (Incubating), describes how to build and deploy Polaris locally, and finally includes examples of how to use Polaris with Apache Spark™. -## Prerequisites +# Prerequisites This guide covers building Polaris, deploying it locally or via [Docker](https://www.docker.com/), and interacting with it using the command-line interface and [Apache Spark](https://spark.apache.org/). Before proceeding with Polaris, be sure to satisfy the relevant prerequisites listed here. -### Git +## Git To get the latest Polaris code, you'll need to clone the repository using [git](https://git-scm.com/). You can install git using [homebrew](https://brew.sh/) on MacOS: @@ -45,13 +45,13 @@ cd ~ git clone https://github.com/apache/polaris.git ``` -### Docker +## Docker It is recommended to deploy Polaris inside [Docker](https://www.docker.com/) for the Quick Start workflow. Instructions for deploying the Quick Start workflow on the supported Cloud Providers (AWS, Azure, GCP) will be provided only with Docker. However, non-Docker deployment instructions for local deployments can also be followed on Cloud Providers. Instructions to install Docker can be found on the [Docker website](https://docs.docker.com/engine/install/). Ensure that Docker and the Docker Compose plugin are both installed. -#### Docker on MacOS +### Docker on MacOS Docker can be installed using [homebrew](https://brew.sh/): ```shell @@ -66,7 +66,7 @@ docker run --security-opt seccomp=unconfined apache/polaris:latest Note: Setting the seccomp profile to "unconfined" disables the default system call filtering, which may pose security risks. Use this configuration with caution, especially in production environments. -#### Docker on Amazon Linux +### Docker on Amazon Linux Docker can be installed using a modification to the CentOS instructions. For example: ```shell @@ -83,7 +83,7 @@ sudo sed -i 's/$releasever/9/g' /etc/yum.repos.d/docker-ce.repo sudo dnf -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin ``` -#### Confirm Docker Installation +### Confirm Docker Installation Once installed, make sure that both Docker and the Docker Compose plugin are installed: @@ -98,7 +98,7 @@ Also make sure Docker is running and is able to run a sample Docker container: docker run hello-world ``` -#### Java +## Java If you plan to build Polaris from source yourself or using this tutorial's instructions on a Cloud Provider, you will need to satisfy a few prerequisites first. @@ -113,6 +113,6 @@ jenv local 21 Ensure that `java --version` and `javac` both return non-zero responses. -#### jq +## jq Most Polaris Quickstart scripts require `jq`. Follow the instructions from the [jq](https://jqlang.org/download/) website to download this tool. \ No newline at end of file diff --git a/site/content/in-dev/unreleased/quickstart-using-polaris.md b/site/content/in-dev/unreleased/quickstart/using-polaris.md similarity index 88% rename from site/content/in-dev/unreleased/quickstart-using-polaris.md rename to site/content/in-dev/unreleased/quickstart/using-polaris.md index 4f7dea18b5..20feb91502 100644 --- a/site/content/in-dev/unreleased/quickstart-using-polaris.md +++ b/site/content/in-dev/unreleased/quickstart/using-polaris.md @@ -19,7 +19,7 @@ # Title: Using Polaris type: docs -weight: 120 +weight: 300 --- ### Connecting to Polaris @@ -28,7 +28,7 @@ Polaris is compatible with any [Apache Iceberg](https://iceberg.apache.org/) cli ## Defining a Catalog -In Polaris, the [catalog]({{% ref "entities#catalog" %}}) is the top-level entity that objects like [tables]({{% ref "entities#table" %}}) and [views]({{% ref "entities#view" %}}) are organized under. With a Polaris service running, you can create a catalog like so: +In Polaris, the [catalog]({{% relref "../entities#catalog" %}}) is the top-level entity that objects like [tables]({{% relref "../entities#table" %}}) and [views]({{% relref "../entities#view" %}}) are organized under. With a Polaris service running, you can create a catalog like so: ```shell cd ~/polaris @@ -48,14 +48,14 @@ This will create a new catalog called **quickstart_catalog**. The `DEFAULT_BASE_LOCATION` you provide will be the default location that objects in this catalog should be stored in, and the `ROLE_ARN` you provide should be a [Role ARN](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) with access to read and write data in that location. These credentials will be provided to engines reading data from the catalog once they have authenticated with Polaris using credentials that have access to those resources. -If you’re using a storage type other than S3, such as Azure, you’ll provide a different type of credential than a Role ARN. For more details on supported storage types, see the [docs]({{% ref "entities#storage-type" %}}). +If you’re using a storage type other than S3, such as Azure, you’ll provide a different type of credential than a Role ARN. For more details on supported storage types, see the [docs]({{% relref "../entities#storage-type" %}}). -Additionally, if Polaris is running somewhere other than `localhost:8181`, you can specify the correct hostname and port by providing `--host` and `--port` flags. For the full set of options supported by the CLI, please refer to the [docs]({{% ref "command-line-interface" %}}). +Additionally, if Polaris is running somewhere other than `localhost:8181`, you can specify the correct hostname and port by providing `--host` and `--port` flags. For the full set of options supported by the CLI, please refer to the [docs]({{% relref "../command-line-interface" %}}). ### Creating a Principal and Assigning it Privileges -With a catalog created, we can create a [principal]({{% ref "entities#principal" %}}) that has access to manage that catalog. For details on how to configure the Polaris CLI, see [the section above](#defining-a-catalog) or refer to the [docs]({{% ref "command-line-interface" %}}). +With a catalog created, we can create a [principal]({{% relref "../entities#principal" %}}) that has access to manage that catalog. For details on how to configure the Polaris CLI, see [the section above](#defining-a-catalog) or refer to the [docs]({{% relref "../command-line-interface" %}}). ```shell ./polaris \ @@ -90,7 +90,7 @@ When the `principals create` command completes successfully, it will return the {"clientId": "XXXX", "clientSecret": "YYYY"} ``` -Now, we grant the principal the [principal role]({{% ref "entities#principal-role" %}}) we created, and grant the [catalog role]({{% ref "entities#catalog-role" %}}) the principal role we created. For more information on these entities, please refer to the linked documentation. +Now, we grant the principal the [principal role]({{% relref "../entities#principal-role" %}}) we created, and grant the [catalog role]({{% relref "../entities#catalog-role" %}}) the principal role we created. For more information on these entities, please refer to the linked documentation. ```shell ./polaris \ @@ -115,7 +115,7 @@ Now, we’ve linked our principal to the catalog via roles like so: ![Principal to Catalog](/img/quickstart/privilege-illustration-1.png "Principal to Catalog") -In order to give this principal the ability to interact with the catalog, we must assign some [privileges]({{% ref "entities#privilege" %}}). For the time being, we will give this principal the ability to fully manage content in our new catalog. We can do this with the CLI like so: +In order to give this principal the ability to interact with the catalog, we must assign some [privileges]({{% relref "../entities#privilege" %}}). For the time being, we will give this principal the ability to fully manage content in our new catalog. We can do this with the CLI like so: ```shell ./polaris \ @@ -129,7 +129,7 @@ In order to give this principal the ability to interact with the catalog, we mus CATALOG_MANAGE_CONTENT ``` -This grants the [catalog privileges]({{% ref "entities#privilege" %}}) `CATALOG_MANAGE_CONTENT` to our catalog role, linking everything together like so: +This grants the [catalog privileges]({{% relref "../entities#privilege" %}}) `CATALOG_MANAGE_CONTENT` to our catalog role, linking everything together like so: ![Principal to Catalog with Catalog Role](/img/quickstart/privilege-illustration-2.png "Principal to Catalog with Catalog Role") From db282f2770ed8bf2b2d07726920579ae348086b0 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 14 Apr 2025 16:12:34 -0700 Subject: [PATCH 17/21] Remove Trino-specific tutorial --- getting-started/eclipselink/README.md | 18 ++++++ getting-started/trino/README.md | 58 ------------------ .../trino/create-polaris-catalog.sh | 61 ------------------- getting-started/trino/docker-compose.yml | 58 ------------------ .../trino-config/catalog/iceberg.properties | 28 --------- .../in-dev/unreleased/quickstart/_index.md | 2 - 6 files changed, 18 insertions(+), 207 deletions(-) delete mode 100644 getting-started/trino/README.md delete mode 100644 getting-started/trino/create-polaris-catalog.sh delete mode 100644 getting-started/trino/docker-compose.yml delete mode 100644 getting-started/trino/trino-config/catalog/iceberg.properties diff --git a/getting-started/eclipselink/README.md b/getting-started/eclipselink/README.md index 451f9cbe74..1170a28558 100644 --- a/getting-started/eclipselink/README.md +++ b/getting-started/eclipselink/README.md @@ -70,3 +70,21 @@ This example requires `jq` to be installed on your machine. curl -v http://127.0.0.1:8181/api/management/v1/principal-roles -H "Authorization: Bearer $POLARIS_TOKEN" curl -v http://127.0.0.1:8181/api/management/v1/catalogs/polaris_demo -H "Authorization: Bearer $POLARIS_TOKEN" ``` + +6. Using Trino CLI: To access the Trino CLI, run this command: +``` +docker exec -it eclipselink-trino-1 trino +``` +Note, `trino-trino-1` is the name of the Docker container. + +Example Trino queries: +``` +SHOW CATALOGS; +SHOW SCHEMAS FROM iceberg; +SHOW TABLES FROM iceberg.information_schema; +DESCRIBE iceberg.information_schema.tables; + +CREATE SCHEMA iceberg.tpch; +CREATE TABLE iceberg.tpch.test_polaris AS SELECT 1 x; +SELECT * FROM iceberg.tpch.test_polaris; +``` diff --git a/getting-started/trino/README.md b/getting-started/trino/README.md deleted file mode 100644 index ec1f4ecd11..0000000000 --- a/getting-started/trino/README.md +++ /dev/null @@ -1,58 +0,0 @@ - - -# Getting Started with Trino and Apache Polaris - -This getting started guide provides a `docker-compose` file to set up [Trino](https://trino.io/) with Apache Polaris. Apache Polaris is configured as an Iceberg REST Catalog in Trino. - -## Build Polaris Image -Build Polaris Image while Docker is running -``` -./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache -``` - -## Run the `docker-compose` file -To start the `docker-compose` file, run this command from the repo's root directory: -``` -docker-compose -f getting-started/trino/docker-compose.yml up -``` - -## Run Trino queries via Trino CLI -To access the Trino CLI, run this command: -``` -docker exec -it trino-trino-1 trino -``` -Note, `trino-trino-1` is the name of the Docker container. - -Example Trino queries: -``` -SHOW CATALOGS; -SHOW SCHEMAS FROM iceberg; -SHOW TABLES FROM iceberg.information_schema; -DESCRIBE iceberg.information_schema.tables; - -CREATE SCHEMA iceberg.tpch; -CREATE TABLE iceberg.tpch.test_polaris AS SELECT 1 x; -SELECT * FROM iceberg.tpch.test_polaris; -``` - -## Note -The Polaris in this example is started with realm `default-realm` and root credentials: `root:s3cr3t`. - -An example catalog is created in Apache Polaris using the `curl` command. See `create-polaris-catalog.sh` for details. diff --git a/getting-started/trino/create-polaris-catalog.sh b/getting-started/trino/create-polaris-catalog.sh deleted file mode 100644 index e08e1c5f88..0000000000 --- a/getting-started/trino/create-polaris-catalog.sh +++ /dev/null @@ -1,61 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -if ! output=$(curl -X POST -H "Polaris-Realm: default-realm" "http://polaris:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=s3cr3t" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi - -token=$(echo "$output" | awk -F\" '{print $4}') - -if [ "$token" == "unauthorized_client" ]; then - logred "Error: Failed to retrieve bearer token" - exit 1 -fi - -PRINCIPAL_TOKEN=$token - -# Use local filesystem by default -curl -i -X POST -H "Authorization: Bearer $PRINCIPAL_TOKEN" -H 'Accept: application/json' -H 'Content-Type: application/json' \ - http://polaris:8181/api/management/v1/catalogs \ - -d '{ - "catalog": { - "name": "polaris", - "type": "INTERNAL", - "readOnly": false, - "properties": { - "default-base-location": "file:///tmp/polaris/" - }, - "storageConfigInfo": { - "storageType": "FILE", - "allowedLocations": [ - "file:///tmp" - ] - } - } - }' - -# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata -curl -i -X PUT -H "Authorization: Bearer $PRINCIPAL_TOKEN" -H 'Accept: application/json' -H 'Content-Type: application/json' \ - http://polaris:8181/api/management/v1/catalogs/polaris/catalog-roles/catalog_admin/grants \ - -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' diff --git a/getting-started/trino/docker-compose.yml b/getting-started/trino/docker-compose.yml deleted file mode 100644 index fd438f0094..0000000000 --- a/getting-started/trino/docker-compose.yml +++ /dev/null @@ -1,58 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -services: - polaris: - image: apache/polaris:latest - ports: - - "8181:8181" - - "8182" - environment: - AWS_REGION: us-west-2 - AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY - GOOGLE_APPLICATION_CREDENTIALS: $GOOGLE_APPLICATION_CREDENTIALS - AZURE_TENANT_ID: $AZURE_TENANT_ID - AZURE_CLIENT_ID: $AZURE_CLIENT_ID - AZURE_CLIENT_SECRET: $AZURE_CLIENT_SECRET - POLARIS_BOOTSTRAP_CREDENTIALS: default-realm,root,s3cr3t - polaris.realm-context.realms: default-realm - quarkus.otel.sdk.disabled: "true" - - healthcheck: - test: ["CMD", "curl", "http://localhost:8182/healthcheck"] - interval: 10s - timeout: 10s - retries: 5 - - create-polaris-catalog: - image: curlimages/curl - depends_on: - polaris: - condition: service_healthy - volumes: - - ./create-polaris-catalog.sh:/create-polaris-catalog.sh - command: ["/bin/sh", "/create-polaris-catalog.sh"] - - trino: - image: trinodb/trino:latest - ports: - - "8080:8080" - volumes: - - ./trino-config/catalog:/etc/trino/catalog diff --git a/getting-started/trino/trino-config/catalog/iceberg.properties b/getting-started/trino/trino-config/catalog/iceberg.properties deleted file mode 100644 index 1cd0a0e7a2..0000000000 --- a/getting-started/trino/trino-config/catalog/iceberg.properties +++ /dev/null @@ -1,28 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -connector.name=iceberg -iceberg.catalog.type=rest -iceberg.rest-catalog.uri=http://polaris:8181/api/catalog -iceberg.rest-catalog.security=OAUTH2 -iceberg.rest-catalog.oauth2.credential=root:s3cr3t -iceberg.rest-catalog.oauth2.scope=PRINCIPAL_ROLE:ALL -iceberg.rest-catalog.warehouse=polaris -# Required to support local filesystem: https://trino.io/docs/current/object-storage.html#configuration -fs.hadoop.enabled=true diff --git a/site/content/in-dev/unreleased/quickstart/_index.md b/site/content/in-dev/unreleased/quickstart/_index.md index bf87ac0540..b8a84025c6 100644 --- a/site/content/in-dev/unreleased/quickstart/_index.md +++ b/site/content/in-dev/unreleased/quickstart/_index.md @@ -17,11 +17,9 @@ # specific language governing permissions and limitations # under the License. # -#linkTitle: 'Quick Start' title: 'Quick Start' type: docs weight: 100 build: -# list: never render: never --- \ No newline at end of file From ac700ce4bb7f57b353e251d72b65451b0930b50a Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Tue, 15 Apr 2025 04:17:28 -0700 Subject: [PATCH 18/21] Restructured Quick Start --- site/content/in-dev/unreleased/_index.md | 2 +- .../{quickstart => getting-started}/_index.md | 4 ++-- .../deploying-polaris/_index.md | 4 ++-- .../quickstart-deploy-aws.md | 0 .../quickstart-deploy-azure.md | 0 .../quickstart-deploy-gcp.md | 0 .../install-dependencies.md | 0 .../quickstart.md} | 20 ++++++++++--------- .../using-polaris.md | 10 +++------- 9 files changed, 19 insertions(+), 21 deletions(-) rename site/content/in-dev/unreleased/{quickstart => getting-started}/_index.md (95%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/deploying-polaris/_index.md (95%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/deploying-polaris/quickstart-deploy-aws.md (100%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/deploying-polaris/quickstart-deploy-azure.md (100%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/deploying-polaris/quickstart-deploy-gcp.md (100%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/install-dependencies.md (100%) rename site/content/in-dev/unreleased/{quickstart/deploying-polaris/quickstart-local-deployment.md => getting-started/quickstart.md} (80%) rename site/content/in-dev/unreleased/{quickstart => getting-started}/using-polaris.md (96%) diff --git a/site/content/in-dev/unreleased/_index.md b/site/content/in-dev/unreleased/_index.md index 19100e7123..12981ffe52 100644 --- a/site/content/in-dev/unreleased/_index.md +++ b/site/content/in-dev/unreleased/_index.md @@ -37,7 +37,7 @@ These pages refer to the current state of the main branch, which is still under Functionalities can be changed, removed or added without prior notice. {{< /alert >}} -Check out the [Quick Start]({{% ref "quickstart" %}}) page to get started. +Check out the [Quick Start]({{% ref "getting-started" %}}) page to get started.