From cb1d2bacda37212ce42960691a591b84cb29b679 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 16:27:57 -0700 Subject: [PATCH 01/21] AWS First Draft --- .../assets/cloud_providers/deploy-aws.sh | 10 ++++-- .../assets/polaris/create-catalog.sh | 35 ++++++++++++++----- .../eclipselink/docker-compose.yml | 4 +++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index c943eb69b8..6638bcf93e 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -21,12 +21,12 @@ EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ --instance-ids $EC2_INSTANCE_ID \ - --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ + --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone,RoleArn:IamInstanceProfile.Arn}' \ --output json) CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") - CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') +export AWS_ROLE_ARN=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."RoleArn") ALL_SUBNETS=$(aws ec2 describe-subnets \ --region $CURRENT_REGION \ @@ -69,6 +69,12 @@ POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +S3_BUCKET_NAME="polaris-test-s3-$RANDOM_SUFFIX" + +aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --create-bucket-configuration LocationConstraint=$CURRENT_REGION + +export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index b7b8e0f51d..3c8b38aeed 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -19,7 +19,7 @@ set -e -token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ +token=$(curl -s http://localhost:8181/api/catalog/v1/oauth/tokens \ --user root:s3cr3t \ -d grant_type=client_credentials \ -d scope=PRINCIPAL_ROLE:ALL | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') @@ -32,6 +32,30 @@ fi echo echo "Obtained access token: ${token}" +STORAGE_TYPE="FILE" +if [ -z "${STORAGE_LOCATION}" ]; then + echo "STORAGE_LOCATION is not set, using FILE storage type" + STORAGE_LOCATION="file:///var/tmp/quickstart_catalog/" +else + echo "STORAGE_LOCATION is set to '$STORAGE_LOCATION'" + if [[ "$STORAGE_LOCATION" == s3* ]]; then + STORAGE_TYPE="S3" + elif [[ "$STORAGE_LOCATION" == gs* ]]; then + STORAGE_TYPE="GCS" + else + STORAGE_TYPE="AZURE" + fi + echo "Using StorageType: $STORAGE_TYPE" +fi + +STORAGE_CONFIG_INFO = '{"storageType": "$STORAGE_TYPE", "allowedLocations": ["$STORAGE_LOCATION"]}' + +if [[ "$STORAGE_TYPE" == "S3" ]]; then + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $AWS_ROLE_ARN}') +elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: [$AZURE_TENANT_ID]}') +fi + echo echo Creating a catalog named quickstart_catalog... @@ -45,14 +69,9 @@ curl -s -H "Authorization: Bearer ${token}" \ "type": "INTERNAL", "readOnly": false, "properties": { - "default-base-location": "file:///var/tmp/quickstart_catalog/" + "default-base-location": "$STORAGE_LOCATION" }, - "storageConfigInfo": { - "storageType": "FILE", - "allowedLocations": [ - "file:///var/tmp" - ] - } + "storageConfigInfo": $STORAGE_CONFIG_INFO } }' diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 4861675991..3b49080c0f 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -50,6 +50,10 @@ services: depends_on: polaris: condition: service_healthy + environment: + - STORAGE_LOCATION=${STORAGE_LOCATION} + - AWS_ROLE_ARN=${AWS_ROLE_ARN} + - AZURE_TENANT_ID=${AZURE_TENANT_ID} volumes: - ../assets/polaris/:/polaris entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' From 4205ef4934e9702e95038c3b74d941e6c781d488 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 17:07:31 -0700 Subject: [PATCH 02/21] Debug --- getting-started/assets/cloud_providers/deploy-aws.sh | 1 + getting-started/assets/polaris/create-catalog.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 6638bcf93e..54f177d28d 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -70,6 +70,7 @@ FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" S3_BUCKET_NAME="polaris-test-s3-$RANDOM_SUFFIX" +echo "S3 Bucket Name: $S3_BUCKET_NAME" aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --create-bucket-configuration LocationConstraint=$CURRENT_REGION diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 3c8b38aeed..21a4fc0bd8 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -48,7 +48,7 @@ else echo "Using StorageType: $STORAGE_TYPE" fi -STORAGE_CONFIG_INFO = '{"storageType": "$STORAGE_TYPE", "allowedLocations": ["$STORAGE_LOCATION"]}' +STORAGE_CONFIG_INFO='{"storageType": "$STORAGE_TYPE", "allowedLocations": ["$STORAGE_LOCATION"]}' if [[ "$STORAGE_TYPE" == "S3" ]]; then STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $AWS_ROLE_ARN}') From 142f93c515881b0c3d525e0cb620365f573e9d87 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 17:15:30 -0700 Subject: [PATCH 03/21] revert typo --- getting-started/assets/polaris/create-catalog.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 21a4fc0bd8..893b0e1901 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -19,7 +19,7 @@ set -e -token=$(curl -s http://localhost:8181/api/catalog/v1/oauth/tokens \ +token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ --user root:s3cr3t \ -d grant_type=client_credentials \ -d scope=PRINCIPAL_ROLE:ALL | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') From 330461eaab9c2b4da1d88fac7ba3ed1f0a11617d Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 17:22:56 -0700 Subject: [PATCH 04/21] Add JQ to docker runtime --- getting-started/assets/polaris/create-catalog.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 893b0e1901..ad2c2457d4 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -19,6 +19,8 @@ set -e +apk add --no-cache jq + token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ --user root:s3cr3t \ -d grant_type=client_credentials \ From 1fd5784bb6bf47fbfae384803e3e0664565205b1 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 17:33:29 -0700 Subject: [PATCH 05/21] Debug, pt2 --- getting-started/assets/polaris/create-catalog.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index ad2c2457d4..22b47e1a2b 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -53,9 +53,9 @@ fi STORAGE_CONFIG_INFO='{"storageType": "$STORAGE_TYPE", "allowedLocations": ["$STORAGE_LOCATION"]}' if [[ "$STORAGE_TYPE" == "S3" ]]; then - STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $AWS_ROLE_ARN}') + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $roleArn}') elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then - STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: [$AZURE_TENANT_ID]}') + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: [$tenantId]}') fi echo From 13a6ab65efac2dd93087bacfd5033f40ad065755 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 17:50:29 -0700 Subject: [PATCH 06/21] debug --- getting-started/assets/polaris/create-catalog.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 22b47e1a2b..a12173b76c 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -50,7 +50,7 @@ else echo "Using StorageType: $STORAGE_TYPE" fi -STORAGE_CONFIG_INFO='{"storageType": "$STORAGE_TYPE", "allowedLocations": ["$STORAGE_LOCATION"]}' +STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\", \"allowedLocations\": [\"$STORAGE_LOCATION\"]}" if [[ "$STORAGE_TYPE" == "S3" ]]; then STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $roleArn}') From 588f07c594cf16b1489506595b489357f96348c8 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 18:19:46 -0700 Subject: [PATCH 07/21] debug --- .../assets/polaris/create-catalog.sh | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index a12173b76c..34183a5c68 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -61,21 +61,25 @@ fi echo echo Creating a catalog named quickstart_catalog... +PAYLOAD='{ + "catalog": { + "name": "quickstart_catalog", + "type": "INTERNAL", + "readOnly": false, + "properties": { + "default-base-location": "'$STORAGE_LOCATION'" + }, + "storageConfigInfo": '$STORAGE_CONFIG_INFO' + } + }' + +echo $PAYLOAD + curl -s -H "Authorization: Bearer ${token}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://polaris:8181/api/management/v1/catalogs \ - -d '{ - "catalog": { - "name": "quickstart_catalog", - "type": "INTERNAL", - "readOnly": false, - "properties": { - "default-base-location": "$STORAGE_LOCATION" - }, - "storageConfigInfo": $STORAGE_CONFIG_INFO - } - }' + -d "$PAYLOAD" echo echo Done. \ No newline at end of file From 4912bf2b4e61d6e508ec5330440eb50f8e3c8111 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 18:30:26 -0700 Subject: [PATCH 08/21] Allow Instance Profile Roles --- .../polaris/core/storage/aws/AwsStorageConfigurationInfo.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java index b41e545a30..a6d71d9070 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java @@ -34,7 +34,7 @@ public class AwsStorageConfigurationInfo extends PolarisStorageConfigurationInfo // Technically, it should be ^arn:(aws|aws-cn|aws-us-gov):iam::(\d{12}):role/.+$, @JsonIgnore - public static final String ROLE_ARN_PATTERN = "^arn:(aws|aws-us-gov):iam::(\\d{12}):role/.+$"; + public static final String ROLE_ARN_PATTERN = "^arn:(aws|aws-us-gov):iam::(\\d{12}):(role|instance-profile)/.+$"; private static final Pattern ROLE_ARN_PATTERN_COMPILED = Pattern.compile(ROLE_ARN_PATTERN); From c4fc667986cf04dcd11b3a4f17e8197eca38ab7d Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 22:00:43 -0700 Subject: [PATCH 09/21] change random suffix --- getting-started/assets/cloud_providers/deploy-aws.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 54f177d28d..e6b3874467 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -34,7 +34,7 @@ ALL_SUBNETS=$(aws ec2 describe-subnets \ --output json \ | jq -r '[.[]["SubnetId"]] | join(" ")') -RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8) +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) SUBNET_GROUP_NAME="polaris-db-subnet-group-$RANDOM_SUFFIX" INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" From 2fff5f0cb5244d5f731b59e4aace03391964fae7 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 22:42:10 -0700 Subject: [PATCH 10/21] change instance profile to regular IAM roles --- getting-started/assets/cloud_providers/deploy-aws.sh | 4 +++- .../polaris/core/storage/aws/AwsStorageConfigurationInfo.java | 2 +- .../deploying-polaris/quickstart-deploy-aws.md | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index e6b3874467..58dae62f59 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -26,7 +26,9 @@ DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') -export AWS_ROLE_ARN=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."RoleArn") +RAW_ROLE_ARN=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."RoleArn") +export AWS_ROLE_ARN="${RAW_ROLE_ARN/instance-profile/role}" + ALL_SUBNETS=$(aws ec2 describe-subnets \ --region $CURRENT_REGION \ diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java index a6d71d9070..b41e545a30 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsStorageConfigurationInfo.java @@ -34,7 +34,7 @@ public class AwsStorageConfigurationInfo extends PolarisStorageConfigurationInfo // Technically, it should be ^arn:(aws|aws-cn|aws-us-gov):iam::(\d{12}):role/.+$, @JsonIgnore - public static final String ROLE_ARN_PATTERN = "^arn:(aws|aws-us-gov):iam::(\\d{12}):(role|instance-profile)/.+$"; + public static final String ROLE_ARN_PATTERN = "^arn:(aws|aws-us-gov):iam::(\\d{12}):role/.+$"; private static final Pattern ROLE_ARN_PATTERN_COMPILED = Pattern.compile(ROLE_ARN_PATTERN); diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md index 266cea1c0d..84a2c5d296 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md @@ -32,6 +32,7 @@ The requirements to run the script below are: * "rds:CreateDBInstance" * "rds:DescribeDBInstances" * "rds:CreateDBSubnetGroup" + * "sts:AssumeRole" on the same role as the Instance Profile role of the EC2 instance on which you are running this script. ```shell chmod +x getting-started/assets/cloud_providers/deploy-aws.sh From ba0ad89a4a786f8efe40ff643ec6daad37ff95ec Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Mon, 21 Apr 2025 23:08:12 -0700 Subject: [PATCH 11/21] AWS Final Draft --- .../getting-started/deploying-polaris/quickstart-deploy-aws.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md index 84a2c5d296..56d5b8b6a5 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md @@ -32,7 +32,7 @@ The requirements to run the script below are: * "rds:CreateDBInstance" * "rds:DescribeDBInstances" * "rds:CreateDBSubnetGroup" - * "sts:AssumeRole" on the same role as the Instance Profile role of the EC2 instance on which you are running this script. + * "sts:AssumeRole" on the same role as the Instance Profile role of the EC2 instance on which you are running this script. Additionally, you should ensure that the Instance Profile contains a trust policy that allows the role to trust itself to be assumed. ```shell chmod +x getting-started/assets/cloud_providers/deploy-aws.sh From 02a70e83df9d4de1c106bddda24e69ef3df80c26 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Tue, 22 Apr 2025 01:14:15 -0700 Subject: [PATCH 12/21] Azure First Draft --- .../assets/cloud_providers/deploy-azure.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index 76ee85432b..a6eccea71b 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -31,6 +31,25 @@ POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host') FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +STORAGE_ACCOUNT_NAME="polaristest$RANDOM_SUFFIX" +STORAGE_CONTAINER_NAME="polaris-test-container-$RANDOM_SUFFIX" + +az storage account create \ + --name "$STORAGE_ACCOUNT_NAME" \ + --resource-group "$CURRENT_RESOURCE_GROUP" \ + --location "$CURRENT_REGION" \ + --sku Standard_LRS \ + --kind StorageV2 \ + --enable-hierarchical-namespace false + +az storage container create \ + --account-name "$STORAGE_ACCOUNT_NAME" \ + --name "$STORAGE_CONTAINER_NAME" \ + --auth-mode login + +export AZURE_TENANT_ID=$(az account show --query tenantId -o tsv) +export STORAGE_LOCATION="abfss://$STORAGE_CONTAINER_NAME@$STORAGE_ACCOUNT_NAME.dfs.core.windows.net/quickstart_catalog" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ From cb9da65f9c73b3356c4f747f11c31d0aba40bec7 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Tue, 22 Apr 2025 01:33:59 -0700 Subject: [PATCH 13/21] debug --- getting-started/assets/polaris/create-catalog.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index 34183a5c68..a4e8287690 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -55,7 +55,7 @@ STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\", \"allowedLocations\": if [[ "$STORAGE_TYPE" == "S3" ]]; then STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $roleArn}') elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then - STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: [$tenantId]}') + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: $tenantId}') fi echo @@ -79,7 +79,7 @@ curl -s -H "Authorization: Bearer ${token}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://polaris:8181/api/management/v1/catalogs \ - -d "$PAYLOAD" + -d "$PAYLOAD" -v echo echo Done. \ No newline at end of file From 95971d2b6099340eae4b3f6a84596ad805456825 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 15:18:52 -0700 Subject: [PATCH 14/21] Azure First Draft --- .../assets/cloud_providers/deploy-aws.sh | 2 ++ .../assets/cloud_providers/deploy-azure.sh | 21 +++++++++++++++++-- .../eclipselink/docker-compose.yml | 4 ++-- .../quickstart-deploy-aws.md | 1 + .../quickstart-deploy-azure.md | 1 + 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 58dae62f59..48f617cfc9 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -78,6 +78,8 @@ aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --crea export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" +export SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index a6eccea71b..a13a1fc0fc 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -17,8 +17,10 @@ # under the License. # -CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') -CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') +DESCRIBE_INSTANCE=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01") +CURRENT_RESOURCE_GROUP=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.resourceGroupName') +CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.location') +CURRENT_VM_NAME=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.name') RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" @@ -47,9 +49,24 @@ az storage container create \ --name "$STORAGE_CONTAINER_NAME" \ --auth-mode login +ASSIGNEE_PRINCIPAL_ID=$(az vm show --name $CURRENT_VM_NAME --resource-group $CURRENT_RESOURCE_GROUP --query identity.principalId -o tsv) +SCOPE=$(az storage account show --name $STORAGE_ACCOUNT_NAME --resource-group $CURRENT_RESOURCE_GROUP --query id -o tsv) +ROLE="Storage Blob Data Contributor" +az role assignment create \ + --assignee $ASSIGNEE_PRINCIPAL_ID \ + --role "$ROLE" \ + --scope "$SCOPE" + export AZURE_TENANT_ID=$(az account show --query tenantId -o tsv) export STORAGE_LOCATION="abfss://$STORAGE_CONTAINER_NAME@$STORAGE_ACCOUNT_NAME.dfs.core.windows.net/quickstart_catalog" +cat >> getting-started/eclipselink/trino-config/catalog/iceberg.properties << EOF +fs.native-azure.enabled=true +azure.auth-type=DEFAULT +EOF + +export SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index c6e4cbe8b8..d0632b3d81 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -59,7 +59,7 @@ services: entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' spark-sql: - image: apache/spark:3.5.5-java17-python3 + image: apache/spark:3.5.4-java17-python3 depends_on: polaris-setup: condition: service_completed_successfully @@ -73,7 +73,7 @@ services: retries: 15 command: [ /opt/spark/bin/spark-sql, - --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17", + --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0", --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", --conf, "spark.sql.catalog.polaris.type=rest", diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md index 56d5b8b6a5..b959a69f6d 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md @@ -27,6 +27,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * There must be at least two subnets created in the VPC and region in which your EC2 instance reside. The span of subnets MUST include at least 2 availability zones (AZs) within the same region. +* Your EC2 instance must be enabled with IMDSv1 or IMDSv2 with 2+ hop limit. * The AWS identity that you will use to run this script must have the following AWS permissions: * "ec2:DescribeInstances" * "rds:CreateDBInstance" diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md index 6acaac7d0e..f8b75de79b 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md @@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * Install the AZ CLI, if it is not already installed on the Azure VM. Instructions to download the AZ CLI can be found [here](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). * You must be logged into the AZ CLI. Please run `az account show` to ensure that you are logged in prior to running this script. +* Assign a System-Assigned Managed Identity to the Azure VM. ```shell chmod +x getting-started/assets/cloud_providers/deploy-azure.sh From 50587eab77edd796cf042e50f3d5f31b665f57ea Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 16:27:35 -0700 Subject: [PATCH 15/21] debug --- getting-started/assets/cloud_providers/deploy-aws.sh | 3 ++- getting-started/assets/cloud_providers/deploy-azure.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 48f617cfc9..cf107ff98c 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -78,7 +78,8 @@ aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --crea export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" -export SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" +SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" +sed "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index a13a1fc0fc..e8fd249346 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -65,7 +65,8 @@ fs.native-azure.enabled=true azure.auth-type=DEFAULT EOF -export SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" +SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" +sed "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ From 54662df3d37ef2cdaa8b2f5841e294f934d1bf12 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 16:34:36 -0700 Subject: [PATCH 16/21] typo --- getting-started/assets/cloud_providers/deploy-aws.sh | 2 +- getting-started/assets/cloud_providers/deploy-azure.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index cf107ff98c..b1789feb90 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -79,7 +79,7 @@ aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --crea export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" -sed "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml +sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index e8fd249346..58975748a6 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -66,7 +66,7 @@ azure.auth-type=DEFAULT EOF SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" -sed "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml +sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ From 73a3903bcf578410877fea32bd9e4540d371ab1d Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 17:20:53 -0700 Subject: [PATCH 17/21] GCP First Try --- getting-started/assets/cloud_providers/deploy-gcp.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index 5da93b5585..2e1c8e655a 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -41,6 +41,13 @@ POSTGRES_ADDR=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="get(ip FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +GCS_BUCKET_NAME="polaris-test-gcs-$RANDOM_SUFFIX" +echo "GCS Bucket Name: $GCS_BUCKET_NAME" + +gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION +export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/" + + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ From 5ed1c62fd2f0a0e721b4addc1c02af03ef71bea5 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 17:46:48 -0700 Subject: [PATCH 18/21] GCP Complete --- getting-started/assets/cloud_providers/deploy-gcp.sh | 3 +++ getting-started/eclipselink/docker-compose.yml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index 2e1c8e655a..b70611ce76 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -47,6 +47,9 @@ echo "GCS Bucket Name: $GCS_BUCKET_NAME" gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/" +SPARK_ADDITIONAL_JARS=",org.apache.iceberg:iceberg-gcp-bundle:1.7.0" +sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index d0632b3d81..64f0dafde2 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -59,7 +59,7 @@ services: entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' spark-sql: - image: apache/spark:3.5.4-java17-python3 + image: apache/spark:3.5.5-java17-python3 depends_on: polaris-setup: condition: service_completed_successfully From de9677eb6a1da5b1db594f93a68c5ef1c8aec606 Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Wed, 23 Apr 2025 22:32:32 -0700 Subject: [PATCH 19/21] GCP Final --- .../getting-started/deploying-polaris/quickstart-deploy-gcp.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md index 7c6e9fbfc0..86ec4a89f0 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md @@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * Install the `gcloud` CLI, if it is not already installed on the GCP VM. Instructions to download the `gcloud` CLI can be found [here](https://cloud.google.com/sdk/docs/install). * Ensure the `Cloud SQL Admin API` has been enabled in your project and that your VM's Principal has access to the correct role: `roles/cloudsql.admin`. +* Ensure the VM's Principal has access to at least Read-only scope on Compute Engine: `compute.readonly`. ```shell chmod +x getting-started/assets/cloud_providers/deploy-gcp.sh From 255a006b67453799f75b84fc8bdb218e0cde3bdb Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 24 Apr 2025 13:37:35 -0700 Subject: [PATCH 20/21] add all jars to Spark --- getting-started/assets/cloud_providers/deploy-aws.sh | 6 +++--- getting-started/assets/cloud_providers/deploy-azure.sh | 4 ++-- getting-started/assets/cloud_providers/deploy-gcp.sh | 4 ++-- getting-started/eclipselink/docker-compose.yml | 2 +- .../deploying-polaris/quickstart-deploy-aws.md | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index b1789feb90..0f4c3114c2 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -71,15 +71,15 @@ POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" -S3_BUCKET_NAME="polaris-test-s3-$RANDOM_SUFFIX" +S3_BUCKET_NAME="polaris-quickstart-s3-$RANDOM_SUFFIX" echo "S3 Bucket Name: $S3_BUCKET_NAME" aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --create-bucket-configuration LocationConstraint=$CURRENT_REGION export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" -SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" -sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml +#SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" +#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index 58975748a6..56621b157b 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -65,8 +65,8 @@ fs.native-azure.enabled=true azure.auth-type=DEFAULT EOF -SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" -sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml +#SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" +#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index b70611ce76..6fb75e6502 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -47,8 +47,8 @@ echo "GCS Bucket Name: $GCS_BUCKET_NAME" gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/" -SPARK_ADDITIONAL_JARS=",org.apache.iceberg:iceberg-gcp-bundle:1.7.0" -sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml +#SPARK_ADDITIONAL_JARS=",org.apache.iceberg:iceberg-gcp-bundle:1.7.0" +#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 64f0dafde2..985a6a9e54 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -73,7 +73,7 @@ services: retries: 15 command: [ /opt/spark/bin/spark-sql, - --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0", + --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17,org.apache.iceberg:iceberg-gcp-bundle:1.7.0,org.apache.iceberg:iceberg-azure-bundle:1.7.0", --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", --conf, "spark.sql.catalog.polaris.type=rest", diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md index b959a69f6d..7425c6e344 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md @@ -27,7 +27,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * There must be at least two subnets created in the VPC and region in which your EC2 instance reside. The span of subnets MUST include at least 2 availability zones (AZs) within the same region. -* Your EC2 instance must be enabled with IMDSv1 or IMDSv2 with 2+ hop limit. +* Your EC2 instance must be enabled with [IMDSv1 or IMDSv2 with 2+ hop limit](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-new-instances.html#configure-IMDS-new-instances-instance-settings). * The AWS identity that you will use to run this script must have the following AWS permissions: * "ec2:DescribeInstances" * "rds:CreateDBInstance" From 2a5fec01b136362d74610ff15b68be5707a8c58d Mon Sep 17 00:00:00 2001 From: adnanhemani Date: Thu, 24 Apr 2025 14:55:02 -0700 Subject: [PATCH 21/21] refactor --- getting-started/assets/cloud_providers/deploy-aws.sh | 3 --- getting-started/assets/cloud_providers/deploy-azure.sh | 3 --- getting-started/assets/cloud_providers/deploy-gcp.sh | 4 ---- 3 files changed, 10 deletions(-) diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index 0f4c3114c2..bd4f31cadc 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -78,9 +78,6 @@ aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --crea export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" -#SPARK_ADDITIONAL_JARS=",software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17" -#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml - ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index 56621b157b..c79b3eedd8 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -65,9 +65,6 @@ fs.native-azure.enabled=true azure.auth-type=DEFAULT EOF -#SPARK_ADDITIONAL_JARS=",org.apache.hadoop:hadoop-azure:3.3.4,org.apache.hadoop:hadoop-azure-datalake:3.3.4,org.apache.iceberg:iceberg-azure-bundle:1.6.1" -#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml - ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index 6fb75e6502..83f26f8e50 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -47,10 +47,6 @@ echo "GCS Bucket Name: $GCS_BUCKET_NAME" gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/" -#SPARK_ADDITIONAL_JARS=",org.apache.iceberg:iceberg-gcp-bundle:1.7.0" -#sed -i "/^\s*--packages/s|\",$|${SPARK_ADDITIONAL_JARS}\",|" getting-started/eclipselink/docker-compose.yml - - ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \