diff --git a/getting-started/assets/cloud_providers/deploy-aws.sh b/getting-started/assets/cloud_providers/deploy-aws.sh index c943eb69b8..bd4f31cadc 100644 --- a/getting-started/assets/cloud_providers/deploy-aws.sh +++ b/getting-started/assets/cloud_providers/deploy-aws.sh @@ -21,12 +21,14 @@ EC2_INSTANCE_ID=$(cat /var/lib/cloud/data/instance-id) DESCRIBE_INSTANCE=$(aws ec2 describe-instances \ --instance-ids $EC2_INSTANCE_ID \ - --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone}' \ + --query 'Reservations[*].Instances[*].{Instance:InstanceId,VPC:VpcId,AZ:Placement.AvailabilityZone,RoleArn:IamInstanceProfile.Arn}' \ --output json) CURRENT_VPC=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."VPC") - CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."AZ" | sed 's/.$//') +RAW_ROLE_ARN=$(echo $DESCRIBE_INSTANCE | jq -r .[0].[0]."RoleArn") +export AWS_ROLE_ARN="${RAW_ROLE_ARN/instance-profile/role}" + ALL_SUBNETS=$(aws ec2 describe-subnets \ --region $CURRENT_REGION \ @@ -34,7 +36,7 @@ ALL_SUBNETS=$(aws ec2 describe-subnets \ --output json \ | jq -r '[.[]["SubnetId"]] | join(" ")') -RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'A-Za-z0-9' | head -c 8) +RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) SUBNET_GROUP_NAME="polaris-db-subnet-group-$RANDOM_SUFFIX" INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" @@ -69,6 +71,13 @@ POSTGRES_ADDR=$(echo $DESCRIBE_DB | jq -r '.["DBInstances"][0]["Endpoint"]' | jq FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +S3_BUCKET_NAME="polaris-quickstart-s3-$RANDOM_SUFFIX" +echo "S3 Bucket Name: $S3_BUCKET_NAME" + +aws s3api create-bucket --bucket $S3_BUCKET_NAME --region $CURRENT_REGION --create-bucket-configuration LocationConstraint=$CURRENT_REGION + +export STORAGE_LOCATION="s3://$S3_BUCKET_NAME/quickstart_catalog/" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/cloud_providers/deploy-azure.sh b/getting-started/assets/cloud_providers/deploy-azure.sh index 76ee85432b..c79b3eedd8 100644 --- a/getting-started/assets/cloud_providers/deploy-azure.sh +++ b/getting-started/assets/cloud_providers/deploy-azure.sh @@ -17,8 +17,10 @@ # under the License. # -CURRENT_REGION=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.location') -CURRENT_RESOURCE_GROUP=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01" | jq -r '.compute.resourceGroupName') +DESCRIBE_INSTANCE=$(curl -H Metadata:true "http://169.254.169.254/metadata/instance?api-version=2021-02-01") +CURRENT_RESOURCE_GROUP=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.resourceGroupName') +CURRENT_REGION=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.location') +CURRENT_VM_NAME=$(echo $DESCRIBE_INSTANCE | jq -r '.compute.name') RANDOM_SUFFIX=$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8) INSTANCE_NAME="polaris-backend-test-$RANDOM_SUFFIX" @@ -31,6 +33,38 @@ POSTGRES_ADDR=$(echo $CREATE_DB_RESPONSE | jq -r '.host') FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +STORAGE_ACCOUNT_NAME="polaristest$RANDOM_SUFFIX" +STORAGE_CONTAINER_NAME="polaris-test-container-$RANDOM_SUFFIX" + +az storage account create \ + --name "$STORAGE_ACCOUNT_NAME" \ + --resource-group "$CURRENT_RESOURCE_GROUP" \ + --location "$CURRENT_REGION" \ + --sku Standard_LRS \ + --kind StorageV2 \ + --enable-hierarchical-namespace false + +az storage container create \ + --account-name "$STORAGE_ACCOUNT_NAME" \ + --name "$STORAGE_CONTAINER_NAME" \ + --auth-mode login + +ASSIGNEE_PRINCIPAL_ID=$(az vm show --name $CURRENT_VM_NAME --resource-group $CURRENT_RESOURCE_GROUP --query identity.principalId -o tsv) +SCOPE=$(az storage account show --name $STORAGE_ACCOUNT_NAME --resource-group $CURRENT_RESOURCE_GROUP --query id -o tsv) +ROLE="Storage Blob Data Contributor" +az role assignment create \ + --assignee $ASSIGNEE_PRINCIPAL_ID \ + --role "$ROLE" \ + --scope "$SCOPE" + +export AZURE_TENANT_ID=$(az account show --query tenantId -o tsv) +export STORAGE_LOCATION="abfss://$STORAGE_CONTAINER_NAME@$STORAGE_ACCOUNT_NAME.dfs.core.windows.net/quickstart_catalog" + +cat >> getting-started/eclipselink/trino-config/catalog/iceberg.properties << EOF +fs.native-azure.enabled=true +azure.auth-type=DEFAULT +EOF + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/cloud_providers/deploy-gcp.sh b/getting-started/assets/cloud_providers/deploy-gcp.sh index 5da93b5585..83f26f8e50 100644 --- a/getting-started/assets/cloud_providers/deploy-gcp.sh +++ b/getting-started/assets/cloud_providers/deploy-gcp.sh @@ -41,6 +41,12 @@ POSTGRES_ADDR=$(gcloud sql instances describe $DB_INSTANCE_NAME --format="get(ip FULL_POSTGRES_ADDR=$(printf '%s\n' "jdbc:postgresql://$POSTGRES_ADDR:5432/{realm}" | sed 's/[&/\]/\\&/g') sed -i "/jakarta.persistence.jdbc.url/ s|value=\"[^\"]*\"|value=\"$FULL_POSTGRES_ADDR\"|" "getting-started/assets/eclipselink/persistence.xml" +GCS_BUCKET_NAME="polaris-test-gcs-$RANDOM_SUFFIX" +echo "GCS Bucket Name: $GCS_BUCKET_NAME" + +gcloud storage buckets create "gs://$GCS_BUCKET_NAME" --location=$CURRENT_REGION +export STORAGE_LOCATION="gs://$GCS_BUCKET_NAME/quickstart_catalog/" + ./gradlew clean :polaris-quarkus-server:assemble :polaris-quarkus-admin:assemble \ -PeclipseLinkDeps=org.postgresql:postgresql:42.7.4 \ -Dquarkus.container-image.tag=postgres-latest \ diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index b7b8e0f51d..a4e8287690 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -19,6 +19,8 @@ set -e +apk add --no-cache jq + token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \ --user root:s3cr3t \ -d grant_type=client_credentials \ @@ -32,29 +34,52 @@ fi echo echo "Obtained access token: ${token}" +STORAGE_TYPE="FILE" +if [ -z "${STORAGE_LOCATION}" ]; then + echo "STORAGE_LOCATION is not set, using FILE storage type" + STORAGE_LOCATION="file:///var/tmp/quickstart_catalog/" +else + echo "STORAGE_LOCATION is set to '$STORAGE_LOCATION'" + if [[ "$STORAGE_LOCATION" == s3* ]]; then + STORAGE_TYPE="S3" + elif [[ "$STORAGE_LOCATION" == gs* ]]; then + STORAGE_TYPE="GCS" + else + STORAGE_TYPE="AZURE" + fi + echo "Using StorageType: $STORAGE_TYPE" +fi + +STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\", \"allowedLocations\": [\"$STORAGE_LOCATION\"]}" + +if [[ "$STORAGE_TYPE" == "S3" ]]; then + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn "$AWS_ROLE_ARN" '. + {roleArn: $roleArn}') +elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then + STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId "$AZURE_TENANT_ID" '. + {tenantId: $tenantId}') +fi + echo echo Creating a catalog named quickstart_catalog... +PAYLOAD='{ + "catalog": { + "name": "quickstart_catalog", + "type": "INTERNAL", + "readOnly": false, + "properties": { + "default-base-location": "'$STORAGE_LOCATION'" + }, + "storageConfigInfo": '$STORAGE_CONFIG_INFO' + } + }' + +echo $PAYLOAD + curl -s -H "Authorization: Bearer ${token}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://polaris:8181/api/management/v1/catalogs \ - -d '{ - "catalog": { - "name": "quickstart_catalog", - "type": "INTERNAL", - "readOnly": false, - "properties": { - "default-base-location": "file:///var/tmp/quickstart_catalog/" - }, - "storageConfigInfo": { - "storageType": "FILE", - "allowedLocations": [ - "file:///var/tmp" - ] - } - } - }' + -d "$PAYLOAD" -v echo echo Done. \ No newline at end of file diff --git a/getting-started/eclipselink/docker-compose.yml b/getting-started/eclipselink/docker-compose.yml index 5ece9c6069..985a6a9e54 100644 --- a/getting-started/eclipselink/docker-compose.yml +++ b/getting-started/eclipselink/docker-compose.yml @@ -50,6 +50,10 @@ services: depends_on: polaris: condition: service_healthy + environment: + - STORAGE_LOCATION=${STORAGE_LOCATION} + - AWS_ROLE_ARN=${AWS_ROLE_ARN} + - AZURE_TENANT_ID=${AZURE_TENANT_ID} volumes: - ../assets/polaris/:/polaris entrypoint: '/bin/sh -c "chmod +x /polaris/create-catalog.sh && /polaris/create-catalog.sh"' @@ -69,7 +73,7 @@ services: retries: 15 command: [ /opt/spark/bin/spark-sql, - --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17", + --packages, "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.0,software.amazon.awssdk:bundle:2.28.17,software.amazon.awssdk:url-connection-client:2.28.17,org.apache.iceberg:iceberg-gcp-bundle:1.7.0,org.apache.iceberg:iceberg-azure-bundle:1.7.0", --conf, "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", --conf, "spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog", --conf, "spark.sql.catalog.polaris.type=rest", diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md index 266cea1c0d..7425c6e344 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-aws.md @@ -27,11 +27,13 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * There must be at least two subnets created in the VPC and region in which your EC2 instance reside. The span of subnets MUST include at least 2 availability zones (AZs) within the same region. +* Your EC2 instance must be enabled with [IMDSv1 or IMDSv2 with 2+ hop limit](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-new-instances.html#configure-IMDS-new-instances-instance-settings). * The AWS identity that you will use to run this script must have the following AWS permissions: * "ec2:DescribeInstances" * "rds:CreateDBInstance" * "rds:DescribeDBInstances" * "rds:CreateDBSubnetGroup" + * "sts:AssumeRole" on the same role as the Instance Profile role of the EC2 instance on which you are running this script. Additionally, you should ensure that the Instance Profile contains a trust policy that allows the role to trust itself to be assumed. ```shell chmod +x getting-started/assets/cloud_providers/deploy-aws.sh diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md index 6acaac7d0e..f8b75de79b 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-azure.md @@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * Install the AZ CLI, if it is not already installed on the Azure VM. Instructions to download the AZ CLI can be found [here](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). * You must be logged into the AZ CLI. Please run `az account show` to ensure that you are logged in prior to running this script. +* Assign a System-Assigned Managed Identity to the Azure VM. ```shell chmod +x getting-started/assets/cloud_providers/deploy-azure.sh diff --git a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md index 7c6e9fbfc0..86ec4a89f0 100644 --- a/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md +++ b/site/content/in-dev/unreleased/getting-started/deploying-polaris/quickstart-deploy-gcp.md @@ -28,6 +28,7 @@ Additionally, Polaris will be bootstrapped to use this database and Docker conta The requirements to run the script below are: * Install the `gcloud` CLI, if it is not already installed on the GCP VM. Instructions to download the `gcloud` CLI can be found [here](https://cloud.google.com/sdk/docs/install). * Ensure the `Cloud SQL Admin API` has been enabled in your project and that your VM's Principal has access to the correct role: `roles/cloudsql.admin`. +* Ensure the VM's Principal has access to at least Read-only scope on Compute Engine: `compute.readonly`. ```shell chmod +x getting-started/assets/cloud_providers/deploy-gcp.sh