From d8c20799e0480a862ab0cb1d4c1dc0119c67d442 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 21:56:39 +0200 Subject: [PATCH 01/17] add s3 compatible storage - first commit --- .gitignore | 4 + build.gradle.kts | 3 + helm/polaris/values.yaml | 273 +++++------------- .../polaris/core/PolarisConfiguration.java | 1 + .../polaris/core/entity/CatalogEntity.java | 50 ++++ .../storage/PolarisCredentialProperty.java | 2 + .../PolarisStorageConfigurationInfo.java | 3 + .../s3/S3CredentialsStorageIntegration.java | 138 +++++++++ .../s3/S3StorageConfigurationInfo.java | 164 +++++++++++ regtests/minio/Readme.md | 42 +++ regtests/minio/certs/CAs/private.key | 5 + regtests/minio/certs/CAs/public.crt | 13 + regtests/minio/certs/private.key | 5 + regtests/minio/certs/public.crt | 13 + regtests/minio/docker-compose.yml | 69 +++++ regtests/minio/queries-for-spark.sql | 42 +++ regtests/run_spark_sql_s3compatible.sh | 220 ++++++++++++++ ...PolarisStorageIntegrationProviderImpl.java | 4 + spec/polaris-management-service.yml | 54 ++++ 19 files changed, 902 insertions(+), 203 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 regtests/minio/Readme.md create mode 100644 regtests/minio/certs/CAs/private.key create mode 100644 regtests/minio/certs/CAs/public.crt create mode 100644 regtests/minio/certs/private.key create mode 100644 regtests/minio/certs/public.crt create mode 100644 regtests/minio/docker-compose.yml create mode 100644 regtests/minio/queries-for-spark.sql create mode 100755 regtests/run_spark_sql_s3compatible.sh diff --git a/.gitignore b/.gitignore index e220135f64..0092dccc2f 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ regtests/output/ # This file, if checked in after running for example regtests, contains unmanaged dependencies that eventually # cause unnecessary "security alerts" like https://github.com/apache/polaris/pull/718. regtests/client/python/poetry.lock +regtests/minio/miniodata/* # Python stuff (see note about poetry.lock above as well!) /poetry.lock @@ -64,6 +65,9 @@ gradle/wrapper/gradle-wrapper-*.sha256 *.ipr *.iws +# VScode +.vscode + # Gradle /.gradle /build-logic/.gradle diff --git a/build.gradle.kts b/build.gradle.kts index 45f20b59e9..02c24a4db2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -109,6 +109,9 @@ tasks.named("rat").configure { excludes.add("regtests/metastore_db/**") excludes.add("regtests/client/python/.openapi-generator/**") excludes.add("regtests/output/**") + excludes.add("regtests/minio/miniodata/**") + excludes.add("regtests/minio/**/*.crt") + excludes.add("regtests/minio/**/*.key") excludes.add("**/*.ipynb") excludes.add("**/*.iml") diff --git a/helm/polaris/values.yaml b/helm/polaris/values.yaml index 7713d8ca2c..a6d7f3180f 100644 --- a/helm/polaris/values.yaml +++ b/helm/polaris/values.yaml @@ -359,209 +359,76 @@ extraEnv: # name: aws-secret # key: secret_access_key -# -- Extra volumes to add to the polaris pod. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumes: [] - # - name: extra-volume - # emptyDir: {} - -# -- Extra volume mounts to add to the polaris container. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumeMounts: [] - # - name: extra-volume - # mountPath: /usr/share/extra-volume - -# -- Add additional init containers to the polaris pod(s) See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/. -extraInitContainers: [] - # - name: your-image-name - # image: your-image - # imagePullPolicy: Always - # command: ['sh', '-c', 'echo "hello world"'] - -tracing: - # -- Specifies whether tracing for the polaris server should be enabled. - enabled: false - # -- The collector endpoint URL to connect to (required). - # The endpoint URL must have either the http:// or the https:// scheme. - # The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port (by default 4317). - # See https://quarkus.io/guides/opentelemetry for more information. - endpoint: "http://otlp-collector:4317" - # -- Which requests should be sampled. Valid values are: "all", "none", or a ratio between 0.0 and - # "1.0d" (inclusive). E.g. "0.5d" means that 50% of the requests will be sampled. - # Note: avoid entering numbers here, always prefer a string representation of the ratio. - sample: "1.0d" - # -- Resource attributes to identify the polaris service among other tracing sources. - # See https://opentelemetry.io/docs/reference/specification/resource/semantic_conventions/#service. - # If left empty, traces will be attached to a service named "Apache Polaris"; to change this, - # provide a service.name attribute here. - attributes: - {} - # service.name: my-polaris - -metrics: - # -- Specifies whether metrics for the polaris server should be enabled. - enabled: true - # -- Additional tags (dimensional labels) to add to the metrics. - tags: - {} - # service: polaris - # environment: production - -serviceMonitor: - # -- Specifies whether a ServiceMonitor for Prometheus operator should be created. - enabled: true - # -- The scrape interval; leave empty to let Prometheus decide. Must be a valid duration, e.g. 1d, 1h30m, 5m, 10s. - interval: "" - # -- Labels for the created ServiceMonitor so that Prometheus operator can properly pick it up. - labels: - {} - # release: prometheus - # -- Relabeling rules to apply to metrics. Ref https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config. - metricRelabelings: [] - # - source_labels: [ __meta_kubernetes_namespace ] - # separator: ; - # regex: (.*) - # target_label: namespace - # replacement: $1 - # action: replace - -# -- Logging configuration. -logging: - # -- The log level of the root category, which is used as the default log level for all categories. - level: INFO - # -- The header name to use for the request ID. - requestIdHeaderName: Polaris-Request-Id - # -- Configuration for the console appender. - console: - # -- Whether to enable the console appender. - enabled: true - # -- The log level of the console appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- Configuration for the file appender. - file: - # -- Whether to enable the file appender. - enabled: false - # -- The log level of the file appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- The local directory where log files are stored. The persistent volume claim will be mounted - # here. - logsDir: /deployments/logs - # -- The log file name. - fileName: polaris.log - # -- Log rotation configuration. - rotation: - # -- The maximum size of the log file before it is rotated. Should be expressed as a Kubernetes quantity. - maxFileSize: 100Mi - # -- The maximum number of backup files to keep. - maxBackupIndex: 5 - # -- An optional suffix to append to the rotated log files. If present, the rotated log files - # will be grouped in time buckets, and each bucket will contain at most maxBackupIndex files. - # The suffix must be in a date-time format that is understood by DateTimeFormatter. If the - # suffix ends with .gz or .zip, the rotated files will also be compressed using the - # corresponding algorithm. - fileSuffix: ~ # .yyyy-MM-dd.gz - # -- The log storage configuration. A persistent volume claim will be created using these - # settings. - storage: - # -- The storage class name of the persistent volume claim to create. - className: standard - # -- The size of the persistent volume claim to create. - size: 512Gi - # -- Labels to add to the persistent volume claim spec selector; a persistent volume with - # matching labels must exist. Leave empty if using dynamic provisioning. - selectorLabels: {} - # app.kubernetes.io/name: polaris - # app.kubernetes.io/instance: RELEASE-NAME - # -- Configuration for specific log categories. - categories: - org.apache.polaris: INFO - org.apache.iceberg.rest: INFO - # Useful to debug configuration issues: - # io.smallrye.config: DEBUG - # -- Configuration for MDC (Mapped Diagnostic Context). Values specified here will be added to the - # log context of all incoming requests and can be used in log patterns. - mdc: - # aid=polaris - # sid=polaris-service - {} - -# -- Realm context resolver configuration. -realmContext: - # -- The type of realm context resolver to use. Two built-in types are supported: default and test; - # test is not recommended for production as it does not perform any realm validation. - type: default - # -- List of valid realms, for use with the default realm context resolver. The first realm in - # the list is the default realm. Realms not in this list will be rejected. - realms: - - POLARIS - -# -- Polaris features configuration. -features: - # -- Features to enable or disable globally. If a feature is not present in the map, the default - # built-in value is used. - defaults: {} - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - # SUPPORTED_CATALOG_STORAGE_TYPES: - # - S3 - # - GCS - # - AZURE - # - FILE - # -- Features to enable or disable per realm. This field is a map of maps. The realm name is the key, and the value is a map of - # feature names to values. If a feature is not present in the map, the default value from the 'defaults' field is used. - realmOverrides: {} - # my-realm: - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: true - -# -- Polaris persistence configuration. -persistence: - # -- The type of persistence to use. Two built-in types are supported: in-memory and eclipse-link. - type: eclipse-link # in-memory - # -- The configuration for the eclipse-link persistence manager. - eclipseLink: - # -- The secret name to pull persistence.xml from. - secret: - # -- The name of the secret to pull persistence.xml from. - # If not provided, the default built-in persistence.xml will be used. This is probably not what you want. - name: ~ - # -- The key in the secret to pull persistence.xml from. - key: persistence.xml - # -- The persistence unit name to use. - persistenceUnit: polaris - -# -- Polaris FileIO configuration. -fileIo: - # -- The type of file IO to use. Two built-in types are supported: default and wasb. The wasb one translates WASB paths to ABFS ones. - type: default - -# -- Storage credentials for the server. If the following properties are unset, default -# credentials will be used, in which case the pod must have the necessary permissions to access the storage. -storage: - # -- The secret to pull storage credentials from. - secret: - # -- The name of the secret to pull storage credentials from. - name: ~ - # -- The key in the secret to pull the AWS access key ID from. Only required when using AWS. - awsAccessKeyId: ~ - # -- The key in the secret to pull the AWS secret access key from. Only required when using AWS. - awsSecretAccessKey: ~ - # -- The key in the secret to pull the GCP token from. Only required when using GCP. - gcpToken: ~ - # -- The key in the secret to pull the GCP token expiration time from. Only required when using GCP. Must be a valid ISO 8601 duration. The default is PT1H (1 hour). - gcpTokenLifespan: ~ - -# -- Polaris authentication configuration. -authentication: - # -- The type of authentication to use. Two built-in types are supported: default and test; - # test is not recommended for production. - authenticator: +# -- Configures whether to enable the bootstrap metastore manager job +bootstrapMetastoreManager: false + +# -- Extra environment variables to add to the bootstrap metastore manager job (see `extraEnv` for an example) +bootstrapExtraEnv: [] + +# -- The secret name to pull persistence.xml from (ensure the key name is 'persistence.xml') +persistenceConfigSecret: ~ + +# -- Configures for polaris-server.yml +polarisServerConfig: + server: + # Maximum number of threads. + maxThreads: 200 + + # Minimum number of thread to keep alive. + minThreads: 10 + applicationConnectors: + # HTTP-specific options. + - type: http + + # The port on which the HTTP server listens for service requests. + port: 8181 + + adminConnectors: + - type: http + port: 8182 + + # The hostname of the interface to which the HTTP server socket wil be found. If omitted, the + # socket will listen on all interfaces. + # bindHost: localhost + + # ssl: + # keyStore: ./example.keystore + # keyStorePassword: example + # + # keyStoreType: JKS # (optional, JKS is default) + + # HTTP request log settings + requestLog: + appenders: + # Settings for logging to stdout. + - type: console + + # # Settings for logging to a file. + # - type: file + + # # The file to which statements will be logged. + # currentLogFilename: ./logs/request.log + + # # When the log file rolls over, the file will be archived to requests-2012-03-15.log.gz, + # # requests.log will be truncated, and new statements written to it. + # archivedLogFilenamePattern: ./logs/requests-%d.log.gz + + # # The maximum number of log files to archive. + # archivedFileCount: 14 + + # # Enable archiving if the request log entries go to the their own file + # archive: true + + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + + callContextResolver: type: default # -- The type of token service to use. Two built-in types are supported: default and test; # test is not recommended for production. diff --git a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java index ca1962e3c3..29b5424b8f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java @@ -216,6 +216,7 @@ public static Builder builder() { .defaultValue( List.of( StorageConfigInfo.StorageTypeEnum.S3.name(), + StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE.name(), StorageConfigInfo.StorageTypeEnum.AZURE.name(), StorageConfigInfo.StorageTypeEnum.GCS.name(), StorageConfigInfo.StorageTypeEnum.FILE.name())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f3bfd6edf0..f8a37dd6f7 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,12 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.S3StorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -141,6 +143,32 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } + if (configInfo instanceof S3StorageConfigurationInfo) { + S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; + return S3StorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) + .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) + .setCredsVendingStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum + .valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsVendingStrategyEnum.class, + s3Config.getCredsVendingStrategy().name())) + .setCredsCatalogAndClientStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.class, + s3Config.getCredsCatalogAndClientStrategy().name())) + .setAllowedLocations(s3Config.getAllowedLocations()) + .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKey( + s3Config.getS3CredentialsCatalogSecretAccessKey()) + .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) + .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .build(); + } if (configInfo instanceof AzureStorageConfigurationInfo) { AzureStorageConfigurationInfo azureConfig = (AzureStorageConfigurationInfo) configInfo; return AzureStorageConfigInfo.builder() @@ -250,6 +278,28 @@ public Builder setStorageConfigurationInfo( awsConfig.validateArn(awsConfigModel.getRoleArn()); config = awsConfig; break; + + case S3_COMPATIBLE: + S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + config = + new S3StorageConfigurationInfo( + PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, + S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsVendingStrategyEnum.class, + s3ConfigModel.getCredsVendingStrategy().name()), + S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsCatalogAndClientStrategyEnum.class, + s3ConfigModel.getCredsCatalogAndClientStrategy().name()), + s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), + s3ConfigModel.getS3CredentialsClientAccessKeyId(), + s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3PathStyleAccess(), + new ArrayList<>(allowedLocations)); + break; case AZURE: AzureStorageConfigInfo azureConfigModel = (AzureStorageConfigInfo) storageConfigModel; AzureStorageConfigurationInfo azureConfigInfo = diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index c79aaf595d..13838e6af9 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -23,6 +23,8 @@ public enum PolarisCredentialProperty { AWS_KEY_ID(String.class, "s3.access-key-id", "the aws access key id"), AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), + AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), + AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 6b0638e837..4f290e77ba 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,6 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,6 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), @@ -241,6 +243,7 @@ public void validateMaxAllowedLocations(int maxAllowedLocations) { /** Polaris' storage type, each has a fixed prefix for its location */ public enum StorageType { S3("s3://"), + S3_COMPATIBLE("s3://"), AZURE(List.of("abfs://", "wasb://", "abfss://", "wasbs://")), GCS("gs://"), FILE("file://"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java new file mode 100644 index 0000000000..5fdbbdf37d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import java.net.URI; +import java.util.EnumMap; +import java.util.Set; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** Credential vendor that supports generating */ +public class S3CredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); + + private StsClient stsClient; + + // Constructor + public S3CredentialsStorageIntegration() { + super(S3CredentialsStorageIntegration.class.getName()); + } + + public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { + + LOGGER.debug("S3Compatible - createStsClient()"); + + LOGGER.info( + "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); + + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + + stsBuilder.region( + Region + .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares + // do not care about regions + stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.create( + s3storageConfig.getS3CredentialsCatalogAccessKeyId(), + s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); + + this.stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3StorageConfigurationInfo storageConfig, + boolean allowListOperation, + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { + + LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + + switch (storageConfig.getCredsVendingStrategy()) { + case KEYS_SAME_AS_CATALOG: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsCatalogAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsCatalogSecretAccessKey()); + break; + + case KEYS_DEDICATED_TO_CLIENT: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsClientAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsClientSecretAccessKey()); + break; + + case TOKEN_WITH_ASSUME_ROLE: + if (this.stsClient == null) { + createStsClient(storageConfig); + } + LOGGER.debug("S3Compatible - assumeRole !"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); + + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put( + PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + break; + + // @TODO implement the MinIO external OpenID Connect - + // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 + // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: + // break; + } + + return propertiesMap; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java new file mode 100644 index 0000000000..c66deeff7d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ +public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private @NotNull CredsVendingStrategyEnum credsVendingStrategy; + private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; + private @NotNull String s3endpoint; + private @NotNull Boolean s3pathStyleAccess; + private @NotNull String s3CredentialsCatalogAccessKeyId; + private @NotNull String s3CredentialsCatalogSecretAccessKey; + private @Nullable String s3CredentialsClientAccessKeyId; + private @Nullable String s3CredentialsClientSecretAccessKey; + + // Define how and what the catalog client will receive as credentials + public static enum CredsVendingStrategyEnum { + KEYS_SAME_AS_CATALOG, + KEYS_DEDICATED_TO_CLIENT, + TOKEN_WITH_ASSUME_ROLE; + }; + + // Define how the access and secret keys will be receive during the catalo creation, if + // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, + // but less dynamic + public static enum CredsCatalogAndClientStrategyEnum { + VALUE, + ENV_VAR_NAME; + }; + + // Constructor + @JsonCreator + public S3StorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull + CredsVendingStrategyEnum credsVendingStrategy, + @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull + CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable + String s3CredentialsClientAccessKeyId, + @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable + String s3CredentialsClientSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "allowedLocations", required = true) @NotNull + List allowedLocations) { + + // Classic super and constructor stuff storing data in private internal properties + super(storageType, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.credsVendingStrategy = + CredsVendingStrategyEnum.valueOf( + CredsVendingStrategyEnum.class, credsVendingStrategy.name()); + this.credsCatalogAndClientStrategy = + CredsCatalogAndClientStrategyEnum.valueOf( + CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); + this.s3pathStyleAccess = s3PathStyleAccess; + this.s3endpoint = s3Endpoint; + + // The constructor is called multiple time during catalog life + // to do substitution only once, there is a basic if null test, otherwise affect the data from + // the "Polaris cache storage" + // this way the first time the value is retrived from the name of the variable + // next time the getenv will try to retrive a variable but is using the value as a nome, it will + // be null, we affect the value provided by "Polaris cache storage" + if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { + String cai = System.getenv(s3CredentialsCatalogAccessKeyId); + String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); + String cli = System.getenv(s3CredentialsClientAccessKeyId); + String cls = System.getenv(s3CredentialsClientSecretAccessKey); + this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = + (cls != null) ? cls : s3CredentialsClientSecretAccessKey; + } else { + this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; + } + } + + public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { + return this.credsVendingStrategy; + } + + public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { + return this.credsCatalogAndClientStrategy; + } + + public @NotNull String getS3Endpoint() { + return this.s3endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3pathStyleAccess; + } + + public @NotNull String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @NotNull String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3CredentialsClientAccessKeyId() { + return this.s3CredentialsClientAccessKeyId; + } + + public @Nullable String getS3CredentialsClientSecretAccessKey() { + return this.s3CredentialsClientSecretAccessKey; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType()) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md new file mode 100644 index 0000000000..08089f56f4 --- /dev/null +++ b/regtests/minio/Readme.md @@ -0,0 +1,42 @@ + + +# MiniIO Secured +## Minio and secured buckets with TLS self-signed / custom AC + +To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate + +## MiniIO generate self-signed certificates designed for docker-compose setup + +- Download minio certificate generator : https://github.com/minio/certgen +- ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs +- they will be mounted in default minio container placeholder + +## Test minIO secured TLS buckets from self-signed certificate with AWS CLI +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` + +## add to java cacerts only the public.crt as an AC +- ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` + +## remove from java cacerts the public.crt +- ```sudo keytool -delete -trustcacerts -cacerts -storepass changeit -noprompt -alias minio``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/certs/CAs/private.key b/regtests/minio/certs/CAs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/CAs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/CAs/public.crt b/regtests/minio/certs/CAs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/CAs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/certs/private.key b/regtests/minio/certs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/public.crt b/regtests/minio/certs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml new file mode 100644 index 0000000000..b61ca65370 --- /dev/null +++ b/regtests/minio/docker-compose.yml @@ -0,0 +1,69 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-minio: + image: minio/minio:latest + container_name: minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + minio_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + volumes: + - ./miniodata:/data + - ./certs:/root/.minio/certs/ + command: ["server", "/data", "--console-address", ":9001"] + minio-configured: + depends_on: + - polaris-minio + image: minio/mc:latest + container_name: mc + networks: + minio_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + volumes: + - ./certs:/root/.mc/certs + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc policy set readwrite minio/warehouse; + /usr/bin/mc rm -r --force --quiet minio/warehouse2; + /usr/bin/mc mb --ignore-existing minio/warehouse2; + /usr/bin/mc policy set readwrite minio/warehouse2; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; + tail -f /dev/null + " +networks: + minio_net: + diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql new file mode 100644 index 0000000000..966ea6db62 --- /dev/null +++ b/regtests/minio/queries-for-spark.sql @@ -0,0 +1,42 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE DATABASE IF NOT EXISTS db1; +CREATE DATABASE IF NOT EXISTS db1.ns1; +CREATE DATABASE IF NOT EXISTS db1.ns2; +CREATE OR REPLACE TABLE db1.ns1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (10, 20); +INSERT INTO db1.ns1.table1 VALUES (11, 21); +INSERT INTO db1.ns1.table1 VALUES (12, 22); +SELECT * FROM db1.ns1.table1; + +CREATE OR REPLACE VIEW db1.ns2.view1 ( line_count COMMENT 'Count of lines') AS SELECT COUNT(1) as qty FROM db1.ns1.table1; +SELECT * FROM db1.ns2.view1; +INSERT INTO db1.ns1.table1 VALUES (13, 23); +SELECT * FROM db1.ns2.view1; + +CREATE DATABASE IF NOT EXISTS db1; +CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (3, 2); + +-- Test the second bucket allowed in the catalog +CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; +CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); +INSERT INTO db2.table1 VALUES (01, 02); +SELECT * FROM db2.table1; + +quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh new file mode 100755 index 0000000000..fc16fa5422 --- /dev/null +++ b/regtests/run_spark_sql_s3compatible.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ----------------------------------------------------------------------------- +# Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. +# ----------------------------------------------------------------------------- +# +# Prequisite: +# This script use a MinIO with TLS. +# Please follow instructions in regtests/minio/Readme.md and update your +# java cacerts with self-signed certificate +# +# Usage: +# ./run_spark_sql_s3compatible.sh [S3-location] +# +# Description: +# - Without arguments: Runs against default minio bucket s3://warehouse/polaris +# - With one arguments: Runs against a catalog backed by minio S3. +# - [S3-location] - The S3 path to use as the default base location for the catalog. +# +# Examples: +# - Run against AWS S3_COMPATIBLE: +# ./run_spark_sql_s3compatible.sh s3://warehouse/polaris + + +clear +if [ $# -ne 0 ] && [ $# -ne 1 ]; then + echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" + echo "Usage: ./run_spark_sql.sh [S3-location]" + exit 1 +fi + +# Init +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" +REGTEST_HOME=$(dirname $(realpath $0)) +cd ${REGTEST_HOME} + + +if [ $# -eq 0 ]; then + echo "creating a catalog backed by S3, default bucket is s3://warehouse/polaris" + S3_LOCATION="s3://warehouse/polaris" +fi + +if [ $# -eq 1 ]; then + echo "creating a catalog backed by S3 from first arg of this script respecting pattern 's3://mybucket/path'" + S3_LOCATION=$1 +fi +# Second location for testing catalog update +S3_LOCATION_2="s3://warehouse2/polaris/" + + + +# check if Polaris is running +polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) +if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then + echo "Polaris is not running on ${POLARIS_HOST:-localhost}:8181. End of script" + exit 1 +fi + +# check if cacerts contain MinIO certificate +cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) +echo $cert_response +if [ -z "$cert_response" ]; then + echo "There is no MinIO certificate in your cacerts, please read regtests/minio/Readme.md" + echo "End of script :-(" + exit 1 +fi + +# start minio with buckets and users +echo -e "\n\n-------\n\n" +echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." +docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured + +echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo -e "\n\n-------\n\n" + +# spark setup +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 + +echo "Doing spark setup... wait a moment" +./setup.sh > /dev/null 2>&1 + +if [ -z "${SPARK_HOME}"]; then + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi + + + + +# start of tests + +# creation of catalog + + +# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris +# CATALOG_ID=minio-user-catalog +# CATALOG_SECRET=12345678-minio-catalog +# CLIENT_ID=minio-user-client +# CLIENT_SECRET=12345678-minio-client + +echo -e "\n----\nCREATE Catalog\n" +response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{ + \"name\": \"manual_spark\", + \"id\": 100, + \"type\": \"INTERNAL\", + \"readOnly\": false, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" +) +echo -e "Catalog creation - response API http code : $response_catalog \n" +if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then + echo "Problem during catalog creation" + exit 1 +fi + + + + +echo -e "Get the catalog created : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + +# Try to update the catalog, - adding a second bucket in the alllowed locations +echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark \ + -d "{ + \"currentEntityVersion\":1, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" + + +echo -e "Get the catalog updated with second allowed location : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + + +echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + + +echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ + -d '{"name": "catalog_admin"}' > /dev/stderr + + +echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +${SPARK_HOME}/bin/spark-sql --verbose \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ + --conf spark.sql.catalog.polaris.warehouse=manual_spark \ + --conf spark.sql.defaultCatalog=polaris \ + --conf spark.hadoop.hive.cli.print.header=true \ + -f "minio/queries-for-spark.sql" + + +echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" +echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" +echo ":-)" +echo "" +docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down + diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index f61c67620f..80b0729d24 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,6 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -73,6 +74,9 @@ public PolarisStorageIntegrationProviderImpl( (PolarisStorageIntegration) new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; + case S3_COMPATIBLE: + storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + break; case GCS: storageIntegration = (PolarisStorageIntegration) diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 54c3b96759..d4a1f44fb8 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -862,6 +862,7 @@ components: type: string enum: - S3 + - S3_COMPATIBLE - GCS - AZURE - FILE @@ -877,6 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -905,6 +907,58 @@ components: required: - roleArn + S3StorageConfigInfo: + type: object + description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + credsCatalogAndClientStrategy: + type: string + enum: + - VALUE + - ENV_VAR_NAME + default: ENV_VAR_NAME + example: "ACCESS_KEY" + description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running + credsVendingStrategy: + type: string + enum: + - TOKEN_WITH_ASSUME_ROLE + - KEYS_SAME_AS_CATALOG + - KEYS_DEDICATED_TO_CLIENT + default: TOKEN_WITH_ASSUME_ROLE + description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) + s3.path-style-access: + type: boolean + description: if true use path style + default: false + s3.endpoint: + type: string + description: the S3 endpoint + example: "http[s]://host:port" + s3.credentials.catalog.access-key-id: + type: string + description: The ACCESS_KEY_ID used y the catalog to communicate with S3 + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secret-access-key: + type: string + description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 + example: "$AWS_SECRET_ACCESS_KEY" + s3.credentials.client.access-key-id: + type: string + description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.client.secret-access-key: + type: string + description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_SECRET_ACCESS_KEY" + required: + - credsVendingStrategy + - s3.endpoint + - s3.credentials.catalog.access-key-id + - s3.credentials.catalog.secret-access-key + AzureStorageConfigInfo: type: object description: azure storage configuration info From 1ec4f07bd11a0f0a9dbacbaa2e24419677749022 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 22:30:38 +0200 Subject: [PATCH 02/17] Create Readme.md for s3-compatible Better descriptions typo & comments Refacoring with skipCredentialSubscopingIndirection -> finaly removed Rebase with AWS updates from main branch adding roleArn, camelCase refactoring, typo, cleaning Add default AWS credentials provider for STS Error Co-authored-by: Gerrit-K Rebase from quarkus and keep only sts with some suggestions from code review helm unit test --- helm/polaris/tests/configmap_test.yaml | 286 ++++++++---------- .../polaris/core/entity/CatalogEntity.java | 52 ++-- .../storage/PolarisCredentialProperty.java | 3 +- .../PolarisStorageConfigurationInfo.java | 4 +- .../s3/S3CredentialsStorageIntegration.java | 138 --------- .../s3/S3StorageConfigurationInfo.java | 164 ---------- ...mpatibleCredentialsStorageIntegration.java | 220 ++++++++++++++ .../S3CompatibleStorageConfigurationInfo.java | 113 +++++++ .../main/resources/application-it.properties | 2 +- .../src/main/resources/application.properties | 2 +- regtests/minio/Readme.md | 11 +- regtests/minio/docker-compose.yml | 4 - regtests/minio/miniodata/Readme.md | 1 + regtests/run_spark_sql_s3compatible.sh | 70 +++-- ...PolarisStorageIntegrationProviderImpl.java | 6 +- spec/polaris-management-service.yml | 57 ++-- 16 files changed, 553 insertions(+), 580 deletions(-) delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java create mode 100644 regtests/minio/miniodata/Readme.md diff --git a/helm/polaris/tests/configmap_test.yaml b/helm/polaris/tests/configmap_test.yaml index ef725ec4f3..e070bf0dcf 100644 --- a/helm/polaris/tests/configmap_test.yaml +++ b/helm/polaris/tests/configmap_test.yaml @@ -183,159 +183,141 @@ tests: set: logging: { file: { enabled: true, json: true }, console: { enabled: true, json: true } } asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.json=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.json=true" } - - - it: should include logging categories - set: - logging: - categories: - # compact style - org.acme: DEBUG - # expanded style - org: - acme: - service: INFO - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme\".level=DEBUG" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme.service\".level=INFO" } - - - it: should include MDC context - set: - logging: - mdc: - # compact style - org.acme: foo - # expanded style - org: - acme: - service: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme\"=foo" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme.service\"=foo" } - - - it: should include telemetry configuration - set: - tracing: { enabled: true, endpoint: http://custom:4317, attributes: { service.name: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.exporter.otlp.endpoint=http://custom:4317" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=service.name=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=foo=bar" } - - - it: should include set sample rate numeric - set: - tracing: { enabled: true, sample: "0.123" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_traceidratio" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler.arg=0.123" } - - - it: should include set sample rate "all" - set: - tracing: { enabled: true, sample: "all" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_always_on" } - - - it: should include set sample rate "none" - set: - tracing: { enabled: true, sample: "none" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=always_off" } - - - it: should disable tracing by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should disable tracing - set: - tracing: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should include custom metrics - set: - metrics: { enabled: true, tags: { app: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.app=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.foo=bar" } - - - it: should disable metrics - set: - metrics: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.micrometer.enabled=false" } - - - it: should include advanced configuration - set: - advancedConfig: - # compact style - quarkus.compact.custom: true - # expanded style - quarkus: - expanded: - custom: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.compact.custom=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.expanded.custom=foo" } - - - it: should not include CORS configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors" } - not: true - - - it: should include CORS configuration if defined - set: - cors: { allowedOrigins: [ "http://localhost:3000", "https://localhost:4000" ], allowedMethods: [ "GET", "POST" ], allowedHeaders: [ "X-Custom1", "X-Custom2" ], exposedHeaders: [ "X-Exposed-Custom1", "X-Exposed-Custom2" ], accessControlMaxAge: "PT1H", accessControlAllowCredentials: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.origins=http://localhost:3000,https://localhost:4000" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.methods=GET,POST" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.headers=X-Custom1,X-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.exposed-headers=X-Exposed-Custom1,X-Exposed-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-max-age=PT1H" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-allow-credentials=false" } - - - it: should configure rate-limiter with default values - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter no-op - set: - rateLimiter.type: no-op - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter with default token bucket values - set: - rateLimiter.type: default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=9999" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT10S" } - - - it: should configure rate-limiter with custom token bucket values - set: - rateLimiter: - type: custom - tokenBucket: - type: custom - requestsPerSecond: 1234 - window: PT5S - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=1234" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT5S" } - - - it: should not include tasks configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks" } - not: true - - - it: should include tasks configuration if defined + - equal: + path: data + value: + polaris-server.yml: |- + authenticator: + class: org.apache.polaris.service.auth.TestInlineBearerTokenPolarisAuthenticator + callContextResolver: + type: default + cors: + allowed-credentials: true + allowed-headers: + - '*' + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + exposed-headers: + - '*' + preflight-max-age: 600 + defaultRealms: + - default-realm + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + io: + factoryType: default + logging: + appenders: + - logFormat: '%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] + %c{30}: %m %kvp%n%ex' + threshold: ALL + type: console + level: INFO + loggers: + org.apache.iceberg.rest: DEBUG + org.apache.polaris: DEBUG + maxRequestBodyBytes: -1 + metaStoreManager: + type: in-memory + oauth2: + type: test + rateLimiter: + type: no-op + realmContextResolver: + type: default + server: + adminConnectors: + - port: 8182 + type: http + applicationConnectors: + - port: 8181 + type: http + maxThreads: 200 + minThreads: 10 + requestLog: + appenders: + - type: console + - it: should set config map data (auto sorted) set: - tasks: { maxConcurrentTasks: 10, maxQueuedTasks: 20 } + polarisServerConfig: + server: + maxThreads: 200 + minThreads: 10 + applicationConnectors: + - type: http + port: 8181 + adminConnectors: + - type: http + port: 8182 + requestLog: + appenders: + - type: console + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + callContextResolver: + type: default + realmContextResolver: + type: default + defaultRealms: + - default-realm + metaStoreManager: + type: eclipse-link + persistence-unit: polaris + conf-file: /eclipselink-config/conf.jar!/persistence.xml + io: + factoryType: default + oauth2: + type: default + tokenBroker: + type: symmetric-key + secret: polaris + authenticator: + class: org.apache.polaris.service.auth.DefaultPolarisAuthenticator + cors: + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-headers: + - "*" + exposed-headers: + - "*" + preflight-max-age: 600 + allowed-credentials: true + logging: + level: INFO + loggers: + org.apache.iceberg.rest: INFO + org.apache.polaris: INFO + appenders: + - type: console + threshold: ALL + logFormat: "%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] %c{30}: %m %kvp%n%ex" + maxRequestBodyBytes: -1 + rateLimiter: + type: no-op asserts: - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-concurrent-tasks=10" } - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-queued-tasks=20" } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f8a37dd6f7..ab70b9b497 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,14 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; -import org.apache.polaris.core.admin.model.S3StorageConfigInfo; +import org.apache.polaris.core.admin.model.S3CompatibleStorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -143,30 +143,19 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } - if (configInfo instanceof S3StorageConfigurationInfo) { - S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; - return S3StorageConfigInfo.builder() + if (configInfo instanceof S3CompatibleStorageConfigurationInfo) { + S3CompatibleStorageConfigurationInfo s3Config = + (S3CompatibleStorageConfigurationInfo) configInfo; + return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) - .setCredsVendingStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum - .valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsVendingStrategyEnum.class, - s3Config.getCredsVendingStrategy().name())) - .setCredsCatalogAndClientStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.class, - s3Config.getCredsCatalogAndClientStrategy().name())) .setAllowedLocations(s3Config.getAllowedLocations()) - .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) - .setS3CredentialsCatalogSecretAccessKey( + .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKeyEnvVar( s3Config.getS3CredentialsCatalogSecretAccessKey()) - .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) - .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .setS3Region(s3Config.getS3Region()) + .setS3RoleArn(s3Config.getS3RoleArn()) .build(); } if (configInfo instanceof AzureStorageConfigurationInfo) { @@ -280,24 +269,17 @@ public Builder setStorageConfigurationInfo( break; case S3_COMPATIBLE: - S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + S3CompatibleStorageConfigInfo s3ConfigModel = + (S3CompatibleStorageConfigInfo) storageConfigModel; config = - new S3StorageConfigurationInfo( + new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, - S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsVendingStrategyEnum.class, - s3ConfigModel.getCredsVendingStrategy().name()), - S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsCatalogAndClientStrategyEnum.class, - s3ConfigModel.getCredsCatalogAndClientStrategy().name()), s3ConfigModel.getS3Endpoint(), - s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), - s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), - s3ConfigModel.getS3CredentialsClientAccessKeyId(), - s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), + s3ConfigModel.getS3Region(), + s3ConfigModel.getS3RoleArn(), new ArrayList<>(allowedLocations)); break; case AZURE: diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index 13838e6af9..b7f1a9808c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -24,7 +24,8 @@ public enum PolarisCredentialProperty { AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), - AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), + AWS_PATH_STYLE_ACCESS( + Boolean.class, "s3.path-style-access", "whether or not to use path-style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 4f290e77ba..c6eac4f7e4 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,7 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), - @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3CompatibleStorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java deleted file mode 100644 index 5fdbbdf37d..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import java.net.URI; -import java.util.EnumMap; -import java.util.Set; -import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.storage.InMemoryStorageIntegration; -import org.apache.polaris.core.storage.PolarisCredentialProperty; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.sts.StsClient; -import software.amazon.awssdk.services.sts.StsClientBuilder; -import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; -import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; - -/** Credential vendor that supports generating */ -public class S3CredentialsStorageIntegration - extends InMemoryStorageIntegration { - - private static final Logger LOGGER = - LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); - - private StsClient stsClient; - - // Constructor - public S3CredentialsStorageIntegration() { - super(S3CredentialsStorageIntegration.class.getName()); - } - - public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { - - LOGGER.debug("S3Compatible - createStsClient()"); - - LOGGER.info( - "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); - - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - - stsBuilder.region( - Region - .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares - // do not care about regions - stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.create( - s3storageConfig.getS3CredentialsCatalogAccessKeyId(), - s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); - - this.stsClient = stsBuilder.build(); - LOGGER.debug("S3Compatible - stsClient successfully built"); - } - - /** {@inheritDoc} */ - @Override - public EnumMap getSubscopedCreds( - @NotNull PolarisDiagnostics diagnostics, - @NotNull S3StorageConfigurationInfo storageConfig, - boolean allowListOperation, - @NotNull Set allowedReadLocations, - @NotNull Set allowedWriteLocations) { - - LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); - - EnumMap propertiesMap = - new EnumMap<>(PolarisCredentialProperty.class); - propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); - propertiesMap.put( - PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, - storageConfig.getS3PathStyleAccess().toString()); - - switch (storageConfig.getCredsVendingStrategy()) { - case KEYS_SAME_AS_CATALOG: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsCatalogAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsCatalogSecretAccessKey()); - break; - - case KEYS_DEDICATED_TO_CLIENT: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsClientAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsClientSecretAccessKey()); - break; - - case TOKEN_WITH_ASSUME_ROLE: - if (this.stsClient == null) { - createStsClient(storageConfig); - } - LOGGER.debug("S3Compatible - assumeRole !"); - AssumeRoleResponse response = - stsClient.assumeRole( - AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); - - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); - propertiesMap.put( - PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); - break; - - // @TODO implement the MinIO external OpenID Connect - - // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 - // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: - // break; - } - - return propertiesMap; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java deleted file mode 100644 index c66deeff7d..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import java.util.List; -import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ -public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { - - // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. - @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; - private @NotNull CredsVendingStrategyEnum credsVendingStrategy; - private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; - private @NotNull String s3endpoint; - private @NotNull Boolean s3pathStyleAccess; - private @NotNull String s3CredentialsCatalogAccessKeyId; - private @NotNull String s3CredentialsCatalogSecretAccessKey; - private @Nullable String s3CredentialsClientAccessKeyId; - private @Nullable String s3CredentialsClientSecretAccessKey; - - // Define how and what the catalog client will receive as credentials - public static enum CredsVendingStrategyEnum { - KEYS_SAME_AS_CATALOG, - KEYS_DEDICATED_TO_CLIENT, - TOKEN_WITH_ASSUME_ROLE; - }; - - // Define how the access and secret keys will be receive during the catalo creation, if - // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, - // but less dynamic - public static enum CredsCatalogAndClientStrategyEnum { - VALUE, - ENV_VAR_NAME; - }; - - // Constructor - @JsonCreator - public S3StorageConfigurationInfo( - @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, - @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull - CredsVendingStrategyEnum credsVendingStrategy, - @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull - CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, - @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull - String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull - String s3CredentialsCatalogSecretAccessKey, - @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable - String s3CredentialsClientAccessKeyId, - @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable - String s3CredentialsClientSecretAccessKey, - @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull - Boolean s3PathStyleAccess, - @JsonProperty(value = "allowedLocations", required = true) @NotNull - List allowedLocations) { - - // Classic super and constructor stuff storing data in private internal properties - super(storageType, allowedLocations); - validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); - this.credsVendingStrategy = - CredsVendingStrategyEnum.valueOf( - CredsVendingStrategyEnum.class, credsVendingStrategy.name()); - this.credsCatalogAndClientStrategy = - CredsCatalogAndClientStrategyEnum.valueOf( - CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); - this.s3pathStyleAccess = s3PathStyleAccess; - this.s3endpoint = s3Endpoint; - - // The constructor is called multiple time during catalog life - // to do substitution only once, there is a basic if null test, otherwise affect the data from - // the "Polaris cache storage" - // this way the first time the value is retrived from the name of the variable - // next time the getenv will try to retrive a variable but is using the value as a nome, it will - // be null, we affect the value provided by "Polaris cache storage" - if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { - String cai = System.getenv(s3CredentialsCatalogAccessKeyId); - String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); - String cli = System.getenv(s3CredentialsClientAccessKeyId); - String cls = System.getenv(s3CredentialsClientSecretAccessKey); - this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = - (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = - (cls != null) ? cls : s3CredentialsClientSecretAccessKey; - } else { - this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; - } - } - - public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { - return this.credsVendingStrategy; - } - - public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { - return this.credsCatalogAndClientStrategy; - } - - public @NotNull String getS3Endpoint() { - return this.s3endpoint; - } - - public @NotNull Boolean getS3PathStyleAccess() { - return this.s3pathStyleAccess; - } - - public @NotNull String getS3CredentialsCatalogAccessKeyId() { - return this.s3CredentialsCatalogAccessKeyId; - } - - public @NotNull String getS3CredentialsCatalogSecretAccessKey() { - return this.s3CredentialsCatalogSecretAccessKey; - } - - public @Nullable String getS3CredentialsClientAccessKeyId() { - return this.s3CredentialsClientAccessKeyId; - } - - public @Nullable String getS3CredentialsClientSecretAccessKey() { - return this.s3CredentialsClientSecretAccessKey; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("storageType", getStorageType()) - .add("storageType", getStorageType().name()) - .add("allowedLocation", getAllowedLocations()) - .toString(); - } - - @Override - public String getFileIoImplClassName() { - return "org.apache.iceberg.aws.s3.S3FileIO"; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java new file mode 100644 index 0000000000..3dfb03814d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; + +import jakarta.annotation.Nonnull; +import java.net.URI; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.apache.polaris.core.PolarisConfigurationStore; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.apache.polaris.core.storage.StorageUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; +import software.amazon.awssdk.policybuilder.iam.IamEffect; +import software.amazon.awssdk.policybuilder.iam.IamPolicy; +import software.amazon.awssdk.policybuilder.iam.IamResource; +import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** S3 compatible implementation of PolarisStorageIntegration */ +public class S3CompatibleCredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); + private final PolarisConfigurationStore configurationStore; + + public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { + super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); + this.configurationStore = configurationStore; + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @Nonnull RealmContext realmContext, + @Nonnull PolarisDiagnostics diagnostics, + @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + boolean allowListOperation, + @Nonnull Set allowedReadLocations, + @Nonnull Set allowedWriteLocations) { + + StsClient stsClient; + String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); + String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + if (storageConfig.getS3Region() != null) { + propertiesMap.put(PolarisCredentialProperty.CLIENT_REGION, storageConfig.getS3Region()); + } + + LOGGER.debug("S3Compatible - createStsClient()"); + try { + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (caI != null && caS != null) { + // else default provider build credentials from profile or standard AWS env var + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug( + "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); + } + stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleSessionName("PolarisCredentialsSTS") + .roleArn( + (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .policy( + policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + .toJson()) + .durationSeconds( + configurationStore.getConfiguration( + realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + LOGGER.debug( + "S3Compatible - assumeRole - Token Expiration at : {}", + response.credentials().expiration().toString()); + + } catch (Exception e) { + System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + } + + return propertiesMap; + } + + /* + * function from AwsCredentialsStorageIntegration but without roleArn parameter + */ + private IamPolicy policyString( + boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = "arn:aws:s3:::"; + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = StorageUtil.getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java new file mode 100644 index 0000000000..776279546a --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * S3-Compatible Storage Configuration. This class holds the parameters needed to connect to + * S3-compatible storage services such as MinIO, Ceph, Dell ECS, etc. + */ +public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private final @NotNull String s3Endpoint; + private final @Nullable String s3CredentialsCatalogAccessKeyId; + private final @Nullable String s3CredentialsCatalogSecretAccessKey; + private final @NotNull Boolean s3PathStyleAccess; + private final @Nullable String s3Region; + private final @Nullable String s3RoleArn; + + @JsonCreator + public S3CompatibleStorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, + @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, + @JsonProperty(value = "allowedLocations", required = true) @Nullable + List allowedLocations) { + + super(StorageType.S3_COMPATIBLE, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.s3PathStyleAccess = s3PathStyleAccess; + this.s3Endpoint = s3Endpoint; + this.s3CredentialsCatalogAccessKeyId = + (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; + this.s3Region = s3Region; + this.s3RoleArn = s3RoleArn; + } + + public @NotNull String getS3Endpoint() { + return this.s3Endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3PathStyleAccess; + } + + public @Nullable String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @Nullable String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3RoleArn() { + return this.s3RoleArn; + } + + public @Nullable String getS3Region() { + return this.s3Region; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .add("s3Region", getS3Region()) + .add("s3RoleArn", getS3RoleArn()) + .add("s3PathStyleAccess", getS3PathStyleAccess()) + .add("s3Endpoint", getS3Endpoint()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/quarkus/defaults/src/main/resources/application-it.properties b/quarkus/defaults/src/main/resources/application-it.properties index 5f46d203f2..e4ad1a6e02 100644 --- a/quarkus/defaults/src/main/resources/application-it.properties +++ b/quarkus/defaults/src/main/resources/application-it.properties @@ -35,7 +35,7 @@ polaris.features.defaults."ALLOW_WILDCARD_LOCATION"=true polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=true polaris.features.defaults."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_it"=true polaris.features.defaults."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","GCS","AZURE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","S3_COMPATIBLE","GCS","AZURE"] polaris.realm-context.realms=POLARIS,OTHER diff --git a/quarkus/defaults/src/main/resources/application.properties b/quarkus/defaults/src/main/resources/application.properties index d3a2057372..9bc6cc03e7 100644 --- a/quarkus/defaults/src/main/resources/application.properties +++ b/quarkus/defaults/src/main/resources/application.properties @@ -90,7 +90,7 @@ polaris.realm-context.header-name=Polaris-Realm polaris.realm-context.require-header=false polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=false -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","GCS","AZURE","FILE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","S3_COMPATIBLE","GCS","AZURE","FILE"] # realm overrides # polaris.features.realm-overrides."my-realm"."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_TEST"=true # polaris.features.realm-overrides."my-realm"."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md index 08089f56f4..afa54e0b2a 100644 --- a/regtests/minio/Readme.md +++ b/regtests/minio/Readme.md @@ -18,22 +18,21 @@ --> # MiniIO Secured -## Minio and secured buckets with TLS self-signed / custom AC +## Minio and secured buckets with TLS self-signed / custom Certificate Authority -To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate +To be able to test Polaris with buckets in TLS under custom Certificate Authority or self-signed certificate ## MiniIO generate self-signed certificates designed for docker-compose setup - Download minio certificate generator : https://github.com/minio/certgen -- ```./certgen -host "localhost,minio,*"``` -- put them in ./certs and ./certs/CAs -- they will be mounted in default minio container placeholder +- Generate certifications: ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs. They will be mounted in the default MinIO container placeholder. ## Test minIO secured TLS buckets from self-signed certificate with AWS CLI - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` -## add to java cacerts only the public.crt as an AC +## add to java cacerts only the public.crt as an Certificate Authority - ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` - ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml index b61ca65370..ff6a5c0a72 100644 --- a/regtests/minio/docker-compose.yml +++ b/regtests/minio/docker-compose.yml @@ -54,14 +54,10 @@ services: until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force --quiet minio/warehouse; /usr/bin/mc mb --ignore-existing minio/warehouse; - /usr/bin/mc policy set readwrite minio/warehouse; /usr/bin/mc rm -r --force --quiet minio/warehouse2; /usr/bin/mc mb --ignore-existing minio/warehouse2; - /usr/bin/mc policy set readwrite minio/warehouse2; /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; - /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; - /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; tail -f /dev/null " networks: diff --git a/regtests/minio/miniodata/Readme.md b/regtests/minio/miniodata/Readme.md new file mode 100644 index 0000000000..d65c6f4723 --- /dev/null +++ b/regtests/minio/miniodata/Readme.md @@ -0,0 +1 @@ +# Folder for MinIO data container volume diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index fc16fa5422..172488b7b2 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -47,7 +47,6 @@ if [ $# -ne 0 ] && [ $# -ne 1 ]; then fi # Init -SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} @@ -65,6 +64,20 @@ fi S3_LOCATION_2="s3://warehouse2/polaris/" +# SPARK_BEARER_TOKEN +if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi +SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') +if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi # check if Polaris is running polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) @@ -73,6 +86,7 @@ if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then exit 1 fi + # check if cacerts contain MinIO certificate cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) echo $cert_response @@ -108,14 +122,15 @@ fi # creation of catalog +echo """ +These environnement variables have to be available to Polaris service : +CATALOG_S3_KEY_ID = minio-user-catalog +CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +export CATALOG_S3_KEY_ID=minio-user-catalog +export CATALOG_S3_KEY_SECRET=12345678-minio-catalog +""" -# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris -# CATALOG_ID=minio-user-catalog -# CATALOG_SECRET=12345678-minio-catalog -# CLIENT_ID=minio-user-client -# CLIENT_SECRET=12345678-minio-client - -echo -e "\n----\nCREATE Catalog\n" +echo -e "\n----\nCREATE Catalog with few parameters \n" response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -130,18 +145,12 @@ response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H " }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\"], - \"s3.path-style-access\": true, - \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.endpoint\": \"https://localhost:9000\" } }" ) + echo -e "Catalog creation - response API http code : $response_catalog \n" if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then echo "Problem during catalog creation" @@ -149,16 +158,14 @@ if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then fi - - echo -e "Get the catalog created : \n" curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark -# Try to update the catalog, - adding a second bucket in the alllowed locations -echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +# Update the catalog +echo -e "\n----\nUPDATE the catalog v1, - adding a second bucket in the alllowed locations\n" curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -170,26 +177,17 @@ curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], - \"s3.path-style-access\": true, \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.region\": \"region-1\", + \"s3.pathStyleAccess\": true, + \"s3.credentials.catalog.accessKeyEnvVar\": \"CATALOG_S3_KEY_ID\", + \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"CATALOG_S3_KEY_SECRET\", + \"s3.roleArn\": \"arn:xxx:xxx:xxx:xxxx\" } }" -echo -e "Get the catalog updated with second allowed location : \n" -curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ - -H 'Accept: application/json' \ - -H 'Content-Type: application/json' \ - http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark - - echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ @@ -212,9 +210,9 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" + echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" echo ":-)" echo "" -docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down - +docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 80b0729d24..9c3aeedb1b 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,7 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; -import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleCredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -75,7 +75,9 @@ public PolarisStorageIntegrationProviderImpl( new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; case S3_COMPATIBLE: - storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + storageIntegration = + (PolarisStorageIntegration) + new S3CompatibleCredentialsStorageIntegration(configurationStore); break; case GCS: storageIntegration = diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index d4a1f44fb8..370a62dc0d 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -878,7 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" - S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3CompatibleStorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -907,57 +907,38 @@ components: required: - roleArn - S3StorageConfigInfo: + S3CompatibleStorageConfigInfo: type: object - description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + description: s3-compatible storage configuration info (MinIO, Ceph, Dell ECS, Netapp StorageGRID, ...) allOf: - $ref: '#/components/schemas/StorageConfigInfo' properties: - credsCatalogAndClientStrategy: - type: string - enum: - - VALUE - - ENV_VAR_NAME - default: ENV_VAR_NAME - example: "ACCESS_KEY" - description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running - credsVendingStrategy: - type: string - enum: - - TOKEN_WITH_ASSUME_ROLE - - KEYS_SAME_AS_CATALOG - - KEYS_DEDICATED_TO_CLIENT - default: TOKEN_WITH_ASSUME_ROLE - description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) - s3.path-style-access: - type: boolean - description: if true use path style - default: false s3.endpoint: type: string description: the S3 endpoint example: "http[s]://host:port" - s3.credentials.catalog.access-key-id: + s3.credentials.catalog.accessKeyEnvVar: type: string - description: The ACCESS_KEY_ID used y the catalog to communicate with S3 - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.catalog.secret-access-key: + description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 - example: "$AWS_SECRET_ACCESS_KEY" - s3.credentials.client.access-key-id: + description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" + s3.pathStyleAccess: + type: boolean + description: Whether or not to use path-style access + default: false + s3.region: type: string - description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.client.secret-access-key: + description: Optional - the s3 region where data is stored + example: "rack-1 or us-east-1" + s3.roleArn: type: string - description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_SECRET_ACCESS_KEY" + description: Optional - a s3 role arn + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" required: - - credsVendingStrategy - s3.endpoint - - s3.credentials.catalog.access-key-id - - s3.credentials.catalog.secret-access-key AzureStorageConfigInfo: type: object From 0327f1519e3d246fe48cb597a219efc513d0bdd0 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Thu, 27 Feb 2025 19:55:21 +0100 Subject: [PATCH 03/17] add s3 profile --- .../polaris/core/entity/CatalogEntity.java | 2 + .../polaris/core/storage/StorageUtil.java | 137 ++++++++++++++++ ...mpatibleCredentialsStorageIntegration.java | 148 ++++-------------- .../S3CompatibleStorageConfigurationInfo.java | 19 ++- regtests/minio/queries-for-spark.sql | 12 +- regtests/run_spark_sql_s3compatible.sh | 140 +++++++++-------- spec/polaris-management-service.yml | 19 ++- 7 files changed, 271 insertions(+), 206 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index ab70b9b497..d5ad54771f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -149,6 +149,7 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3ProfileName(s3Config.getS3ProfileName()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) .setAllowedLocations(s3Config.getAllowedLocations()) .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) @@ -275,6 +276,7 @@ public Builder setStorageConfigurationInfo( new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3ProfileName(), s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java index 02cc2af126..6eb26a94df 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java @@ -20,6 +20,11 @@ import jakarta.annotation.Nonnull; import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import software.amazon.awssdk.policybuilder.iam.*; public class StorageUtil { /** @@ -62,4 +67,136 @@ public class StorageUtil { public static @Nonnull String getBucket(URI uri) { return uri.getAuthority(); } + + /** + * Given a path, return it without leading slash + * + * @param path A path to parse + * @return Same path without leading slash + */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } + + /** + * Given an uri, and format an S3 path + * + * @param uri A path to parse + * @return A bucket and a path joined by slash + */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /** + * Given a roleArn, return the prefix + * + * @param roleArn A roleArn to parse + * @return The prefix of the roleArn + */ + private static String getArnPrefixFor(String roleArn) { + if (roleArn.contains("aws-cn")) { + return "arn:aws-cn:s3:::"; + } else if (roleArn.contains("aws-us-gov")) { + return "arn:aws-us-gov:s3:::"; + } else { + return "arn:aws:s3:::"; + } + } + + /** + * generate an IamPolicy from the input readLocations and writeLocations, optionally with list + * support. Credentials will be scoped to exactly the resources provided. If read and write + * locations are empty, a non-empty policy will be generated that grants GetObject and optionally + * ListBucket privileges with no resources. This prevents us from sending an empty policy to AWS + * and just assuming the role with full privileges. + * + * @param roleArn A roleArn + * @param allowList Allow list or not + * @param readLocations A list of input read locations + * @param writeLocations A list of input write locations + * @return A policy limiting scope access + */ + // TODO - add KMS key access + public static IamPolicy policyString( + String roleArn, boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = getArnPrefixFor(roleArn); + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + // TODO add support for CN and GOV + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + // TODO add support for CN and GOV + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 3dfb03814d..b1aebb4afd 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -21,12 +21,10 @@ import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import jakarta.annotation.Nonnull; +import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; -import java.util.HashMap; -import java.util.Map; import java.util.Set; -import java.util.stream.Stream; import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; import org.apache.polaris.core.context.RealmContext; @@ -36,12 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; -import software.amazon.awssdk.policybuilder.iam.IamEffect; -import software.amazon.awssdk.policybuilder.iam.IamPolicy; -import software.amazon.awssdk.policybuilder.iam.IamResource; -import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.profiles.ProfileFileSupplier; import software.amazon.awssdk.services.sts.StsClient; import software.amazon.awssdk.services.sts.StsClientBuilder; import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; @@ -60,7 +55,6 @@ public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore confi this.configurationStore = configurationStore; } - /** {@inheritDoc} */ @Override public EnumMap getSubscopedCreds( @Nonnull RealmContext realmContext, @@ -70,7 +64,6 @@ public EnumMap getSubscopedCreds( @Nonnull Set allowedReadLocations, @Nonnull Set allowedWriteLocations) { - StsClient stsClient; String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -85,136 +78,51 @@ public EnumMap getSubscopedCreds( } LOGGER.debug("S3Compatible - createStsClient()"); - try { - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); - if (caI != null && caS != null) { - // else default provider build credentials from profile or standard AWS env var - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); - LOGGER.debug( - "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); - } - stsClient = stsBuilder.build(); + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (storageConfig.getS3ProfileName() != null) { + stsBuilder.credentialsProvider( + ProfileCredentialsProvider.builder() + .profileFile(ProfileFileSupplier.defaultSupplier()) + .profileName(storageConfig.getS3ProfileName()) + .build()); + LOGGER.debug("S3Compatible - stsClient using profile from catalog settings"); + } else if (caI != null && caS != null) { + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug("S3Compatible - stsClient using keys from catalog settings"); + } + try (StsClient stsClient = stsBuilder.build()) { LOGGER.debug("S3Compatible - stsClient successfully built"); AssumeRoleResponse response = stsClient.assumeRole( AssumeRoleRequest.builder() .roleSessionName("PolarisCredentialsSTS") - .roleArn( - (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .roleArn(storageConfig.getS3RoleArn()) .policy( - policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + StorageUtil.policyString( + storageConfig.getS3RoleArn(), + allowListOperation, + allowedReadLocations, + allowedWriteLocations) .toJson()) .durationSeconds( configurationStore.getConfiguration( realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); propertiesMap.put( PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); LOGGER.debug( - "S3Compatible - assumeRole - Token Expiration at : {}", + "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); - } catch (Exception e) { - System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + throw new NotAuthorizedException( + "Unable to build S3 Security Token Service client - " + e.getMessage()); } return propertiesMap; } - - /* - * function from AwsCredentialsStorageIntegration but without roleArn parameter - */ - private IamPolicy policyString( - boolean allowList, Set readLocations, Set writeLocations) { - IamPolicy.Builder policyBuilder = IamPolicy.builder(); - IamStatement.Builder allowGetObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetObject") - .addAction("s3:GetObjectVersion"); - Map bucketListStatementBuilder = new HashMap<>(); - Map bucketGetLocationStatementBuilder = new HashMap<>(); - - String arnPrefix = "arn:aws:s3:::"; - Stream.concat(readLocations.stream(), writeLocations.stream()) - .distinct() - .forEach( - location -> { - URI uri = URI.create(location); - allowGetObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - final var bucket = arnPrefix + StorageUtil.getBucket(uri); - if (allowList) { - bucketListStatementBuilder - .computeIfAbsent( - bucket, - (String key) -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:ListBucket") - .addResource(key)) - .addCondition( - IamConditionOperator.STRING_LIKE, - "s3:prefix", - StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); - } - bucketGetLocationStatementBuilder.computeIfAbsent( - bucket, - key -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetBucketLocation") - .addResource(key)); - }); - - if (!writeLocations.isEmpty()) { - IamStatement.Builder allowPutObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:PutObject") - .addAction("s3:DeleteObject"); - writeLocations.forEach( - location -> { - URI uri = URI.create(location); - allowPutObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - }); - policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); - } - if (!bucketListStatementBuilder.isEmpty()) { - bucketListStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - } else if (allowList) { - // add list privilege with 0 resources - policyBuilder.addStatement( - IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); - } - - bucketGetLocationStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String parseS3Path(URI uri) { - String bucket = StorageUtil.getBucket(uri); - String path = trimLeadingSlash(uri.getPath()); - return String.join("/", bucket, path); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String trimLeadingSlash(String path) { - if (path.startsWith("/")) { - path = path.substring(1); - } - return path; - } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 776279546a..76fe11008c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; +import jakarta.annotation.Nonnull; import java.util.List; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.jetbrains.annotations.NotNull; @@ -34,9 +35,10 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. + // for allowed read and write locations for sub-scoping credentials. @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; private final @NotNull String s3Endpoint; + private final @Nullable String s3ProfileName; private final @Nullable String s3CredentialsCatalogAccessKeyId; private final @Nullable String s3CredentialsCatalogSecretAccessKey; private final @NotNull Boolean s3PathStyleAccess; @@ -47,33 +49,39 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigur public S3CompatibleStorageConfigurationInfo( @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable String s3CredentialsCatalogSecretAccessKey, @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull Boolean s3PathStyleAccess, @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, - @JsonProperty(value = "allowedLocations", required = true) @Nullable + @JsonProperty(value = "allowedLocations", required = true) @Nonnull List allowedLocations) { super(StorageType.S3_COMPATIBLE, allowedLocations); validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); this.s3PathStyleAccess = s3PathStyleAccess; this.s3Endpoint = s3Endpoint; + this.s3ProfileName = s3ProfileName; this.s3CredentialsCatalogAccessKeyId = (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; this.s3CredentialsCatalogSecretAccessKey = (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; this.s3Region = s3Region; - this.s3RoleArn = s3RoleArn; + this.s3RoleArn = (s3RoleArn == null) ? "" : s3RoleArn; } public @NotNull String getS3Endpoint() { return this.s3Endpoint; } + public @Nullable String getS3ProfileName() { + return this.s3ProfileName; + } + public @NotNull Boolean getS3PathStyleAccess() { return this.s3PathStyleAccess; } @@ -103,6 +111,7 @@ public String toString() { .add("s3RoleArn", getS3RoleArn()) .add("s3PathStyleAccess", getS3PathStyleAccess()) .add("s3Endpoint", getS3Endpoint()) + .add("s3ProfileName", getS3ProfileName()) .toString(); } diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql index 966ea6db62..0932af1ee7 100644 --- a/regtests/minio/queries-for-spark.sql +++ b/regtests/minio/queries-for-spark.sql @@ -29,14 +29,10 @@ SELECT * FROM db1.ns2.view1; INSERT INTO db1.ns1.table1 VALUES (13, 23); SELECT * FROM db1.ns2.view1; -CREATE DATABASE IF NOT EXISTS db1; -CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); -INSERT INTO db1.ns1.table1 VALUES (3, 2); - -- Test the second bucket allowed in the catalog -CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; -CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); -INSERT INTO db2.table1 VALUES (01, 02); -SELECT * FROM db2.table1; +CREATE DATABASE IF NOT EXISTS wh2 LOCATION 's3://warehouse2/polaris'; +CREATE OR REPLACE TABLE wh2.table1 ( f1 int, f2 int ); +INSERT INTO wh2.table1 VALUES (01, 02); +SELECT * FROM wh2.table1; quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index 172488b7b2..ebd490b58f 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -21,7 +21,7 @@ # Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. # ----------------------------------------------------------------------------- # -# Prequisite: +# Requisite: # This script use a MinIO with TLS. # Please follow instructions in regtests/minio/Readme.md and update your # java cacerts with self-signed certificate @@ -40,6 +40,7 @@ clear + if [ $# -ne 0 ] && [ $# -ne 1 ]; then echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" echo "Usage: ./run_spark_sql.sh [S3-location]" @@ -63,18 +64,20 @@ fi # Second location for testing catalog update S3_LOCATION_2="s3://warehouse2/polaris/" +# If Polaris run inMemory classic mode, principal credentials are : root:secret +# If Polaris run inMemory DEBUG mode, principal credentials are to retrieve from service log within this pattern: 522f251cc2b9c121:6eff0915385979684d575fa1d3f18e2b # SPARK_BEARER_TOKEN if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=secret" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then echo "Error: Failed to retrieve bearer token" exit 1 fi SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') -if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then +if [ "$SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then echo "Error: Failed to retrieve bearer token" exit 1 fi @@ -101,54 +104,54 @@ echo -e "\n\n-------\n\n" echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured -echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo "minio browser is available during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" echo -e "\n\n-------\n\n" # spark setup -export SPARK_VERSION=spark-3.5.2 -export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 - echo "Doing spark setup... wait a moment" +export SPARK_VERSION=spark-3.5.4 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding ./setup.sh > /dev/null 2>&1 - -if [ -z "${SPARK_HOME}"]; then +if [ -z "${SPARK_HOME}" ]; then export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi - - -# start of tests - -# creation of catalog - echo """ -These environnement variables have to be available to Polaris service : -CATALOG_S3_KEY_ID = minio-user-catalog -CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +These environment variables have to be available to Polaris service or as keys in the aws profile, and the name of this profile provided to the catalog as parameter : export CATALOG_S3_KEY_ID=minio-user-catalog export CATALOG_S3_KEY_SECRET=12345678-minio-catalog """ - -echo -e "\n----\nCREATE Catalog with few parameters \n" +echo Add minio-catalog-1 section in aws profile +cat >>~/.aws/credentials < /dev/stderr -echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +echo -e "\n\n---- Assign the catalog_admin to the service_admin.\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ -d '{"name": "catalog_admin"}' > /dev/stderr -echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +echo -e "\n\n---- Start Spark-sql to test Polaris catalog with queries\n" ${SPARK_HOME}/bin/spark-sql --verbose \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ @@ -210,9 +214,13 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" +echo Remove minio-catalog-1 section from aws profile +sed -i '/\[minio-catalog-1\]/,${/\[minio-catalog-1\]/d; d}' ~/.aws/credentials +echo Done. + +echo +echo End of tests, a table and a view data with displayed should be visible in log above +echo Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata' +echo ':-)' -echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" -echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" -echo ":-)" -echo "" -docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down +docker-compose --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down \ No newline at end of file diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 370a62dc0d..2819b802c8 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -915,28 +915,33 @@ components: properties: s3.endpoint: type: string - description: the S3 endpoint + description: S3 endpoint example: "http[s]://host:port" + s3.profileName: + type: string + description: optional - S3 profile name (credentials) used by this catalog to communicate with S3 + example: "default or minio-storage-catalog-1 or ceph-storage-catalog-2" s3.credentials.catalog.accessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'ACCESS_KEY_ID' used by this catalog to communicate with S3 example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'SECRET_ACCESS_KEY' used by this catalog to communicate with S3 example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" s3.pathStyleAccess: type: boolean - description: Whether or not to use path-style access + description: optional - whether or not to use path-style access default: false s3.region: type: string - description: Optional - the s3 region where data is stored + description: optional - s3 region where data is stored example: "rack-1 or us-east-1" s3.roleArn: type: string - description: Optional - a s3 role arn - example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" + description: optional - s3 role arn, used with assumeRole to obtain a Security Token Service + pattern: '^([u|a]rn:\S*:\S*:\S*:\S*:\S*).*$' + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234 or urn:ecs:iam::namespace:user/role" required: - s3.endpoint From 1fa3a97f3fe5870b5c81bffa98a86636dbe3f3b3 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Tue, 4 Mar 2025 22:35:37 +0100 Subject: [PATCH 04/17] rebase --- ...mpatibleCredentialsStorageIntegration.java | 24 +++++++------------ ...PolarisStorageIntegrationProviderImpl.java | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index b1aebb4afd..02b6943117 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -19,18 +19,17 @@ package org.apache.polaris.core.storage.s3compatible; import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; +import static org.apache.polaris.core.PolarisConfiguration.loadConfig; -import jakarta.annotation.Nonnull; import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; import java.util.Set; -import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.storage.InMemoryStorageIntegration; import org.apache.polaris.core.storage.PolarisCredentialProperty; import org.apache.polaris.core.storage.StorageUtil; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -48,21 +47,18 @@ public class S3CompatibleCredentialsStorageIntegration private static final Logger LOGGER = LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); - private final PolarisConfigurationStore configurationStore; - public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { - super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); - this.configurationStore = configurationStore; + public S3CompatibleCredentialsStorageIntegration() { + super(S3CompatibleCredentialsStorageIntegration.class.getName()); } @Override public EnumMap getSubscopedCreds( - @Nonnull RealmContext realmContext, - @Nonnull PolarisDiagnostics diagnostics, - @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3CompatibleStorageConfigurationInfo storageConfig, boolean allowListOperation, - @Nonnull Set allowedReadLocations, - @Nonnull Set allowedWriteLocations) { + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -106,9 +102,7 @@ public EnumMap getSubscopedCreds( allowedReadLocations, allowedWriteLocations) .toJson()) - .durationSeconds( - configurationStore.getConfiguration( - realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 9c3aeedb1b..dff9037863 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -77,7 +77,7 @@ public PolarisStorageIntegrationProviderImpl( case S3_COMPATIBLE: storageIntegration = (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(configurationStore); + new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From bd903043b6b79a4f519d99c0240a768087093e37 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 00:05:03 +0100 Subject: [PATCH 05/17] format --- .../service/storage/PolarisStorageIntegrationProviderImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index dff9037863..3f7247b483 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -76,8 +76,7 @@ public PolarisStorageIntegrationProviderImpl( break; case S3_COMPATIBLE: storageIntegration = - (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(); + (PolarisStorageIntegration) new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From d41da3d732afa41142ee5df164d5a855380d1179 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 21:56:39 +0200 Subject: [PATCH 06/17] add s3 compatible storage - first commit --- .gitignore | 4 + build.gradle.kts | 3 + helm/polaris/values.yaml | 273 +++++------------- .../polaris/core/PolarisConfiguration.java | 1 + .../polaris/core/entity/CatalogEntity.java | 50 ++++ .../storage/PolarisCredentialProperty.java | 2 + .../PolarisStorageConfigurationInfo.java | 3 + .../s3/S3CredentialsStorageIntegration.java | 138 +++++++++ .../s3/S3StorageConfigurationInfo.java | 164 +++++++++++ regtests/minio/Readme.md | 42 +++ regtests/minio/certs/CAs/private.key | 5 + regtests/minio/certs/CAs/public.crt | 13 + regtests/minio/certs/private.key | 5 + regtests/minio/certs/public.crt | 13 + regtests/minio/docker-compose.yml | 69 +++++ regtests/minio/queries-for-spark.sql | 42 +++ regtests/run_spark_sql_s3compatible.sh | 220 ++++++++++++++ ...PolarisStorageIntegrationProviderImpl.java | 4 + spec/polaris-management-service.yml | 54 ++++ 19 files changed, 902 insertions(+), 203 deletions(-) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 regtests/minio/Readme.md create mode 100644 regtests/minio/certs/CAs/private.key create mode 100644 regtests/minio/certs/CAs/public.crt create mode 100644 regtests/minio/certs/private.key create mode 100644 regtests/minio/certs/public.crt create mode 100644 regtests/minio/docker-compose.yml create mode 100644 regtests/minio/queries-for-spark.sql create mode 100755 regtests/run_spark_sql_s3compatible.sh diff --git a/.gitignore b/.gitignore index e220135f64..0092dccc2f 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ regtests/output/ # This file, if checked in after running for example regtests, contains unmanaged dependencies that eventually # cause unnecessary "security alerts" like https://github.com/apache/polaris/pull/718. regtests/client/python/poetry.lock +regtests/minio/miniodata/* # Python stuff (see note about poetry.lock above as well!) /poetry.lock @@ -64,6 +65,9 @@ gradle/wrapper/gradle-wrapper-*.sha256 *.ipr *.iws +# VScode +.vscode + # Gradle /.gradle /build-logic/.gradle diff --git a/build.gradle.kts b/build.gradle.kts index 45f20b59e9..02c24a4db2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -109,6 +109,9 @@ tasks.named("rat").configure { excludes.add("regtests/metastore_db/**") excludes.add("regtests/client/python/.openapi-generator/**") excludes.add("regtests/output/**") + excludes.add("regtests/minio/miniodata/**") + excludes.add("regtests/minio/**/*.crt") + excludes.add("regtests/minio/**/*.key") excludes.add("**/*.ipynb") excludes.add("**/*.iml") diff --git a/helm/polaris/values.yaml b/helm/polaris/values.yaml index 7713d8ca2c..a6d7f3180f 100644 --- a/helm/polaris/values.yaml +++ b/helm/polaris/values.yaml @@ -359,209 +359,76 @@ extraEnv: # name: aws-secret # key: secret_access_key -# -- Extra volumes to add to the polaris pod. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumes: [] - # - name: extra-volume - # emptyDir: {} - -# -- Extra volume mounts to add to the polaris container. See https://kubernetes.io/docs/concepts/storage/volumes/. -extraVolumeMounts: [] - # - name: extra-volume - # mountPath: /usr/share/extra-volume - -# -- Add additional init containers to the polaris pod(s) See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/. -extraInitContainers: [] - # - name: your-image-name - # image: your-image - # imagePullPolicy: Always - # command: ['sh', '-c', 'echo "hello world"'] - -tracing: - # -- Specifies whether tracing for the polaris server should be enabled. - enabled: false - # -- The collector endpoint URL to connect to (required). - # The endpoint URL must have either the http:// or the https:// scheme. - # The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port (by default 4317). - # See https://quarkus.io/guides/opentelemetry for more information. - endpoint: "http://otlp-collector:4317" - # -- Which requests should be sampled. Valid values are: "all", "none", or a ratio between 0.0 and - # "1.0d" (inclusive). E.g. "0.5d" means that 50% of the requests will be sampled. - # Note: avoid entering numbers here, always prefer a string representation of the ratio. - sample: "1.0d" - # -- Resource attributes to identify the polaris service among other tracing sources. - # See https://opentelemetry.io/docs/reference/specification/resource/semantic_conventions/#service. - # If left empty, traces will be attached to a service named "Apache Polaris"; to change this, - # provide a service.name attribute here. - attributes: - {} - # service.name: my-polaris - -metrics: - # -- Specifies whether metrics for the polaris server should be enabled. - enabled: true - # -- Additional tags (dimensional labels) to add to the metrics. - tags: - {} - # service: polaris - # environment: production - -serviceMonitor: - # -- Specifies whether a ServiceMonitor for Prometheus operator should be created. - enabled: true - # -- The scrape interval; leave empty to let Prometheus decide. Must be a valid duration, e.g. 1d, 1h30m, 5m, 10s. - interval: "" - # -- Labels for the created ServiceMonitor so that Prometheus operator can properly pick it up. - labels: - {} - # release: prometheus - # -- Relabeling rules to apply to metrics. Ref https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config. - metricRelabelings: [] - # - source_labels: [ __meta_kubernetes_namespace ] - # separator: ; - # regex: (.*) - # target_label: namespace - # replacement: $1 - # action: replace - -# -- Logging configuration. -logging: - # -- The log level of the root category, which is used as the default log level for all categories. - level: INFO - # -- The header name to use for the request ID. - requestIdHeaderName: Polaris-Request-Id - # -- Configuration for the console appender. - console: - # -- Whether to enable the console appender. - enabled: true - # -- The log level of the console appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- Configuration for the file appender. - file: - # -- Whether to enable the file appender. - enabled: false - # -- The log level of the file appender. - threshold: ALL - # -- Whether to log in JSON format. - json: false - # -- The log format to use. Ignored if JSON format is enabled. See - # https://quarkus.io/guides/logging#logging-format for details. - format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" - # -- The local directory where log files are stored. The persistent volume claim will be mounted - # here. - logsDir: /deployments/logs - # -- The log file name. - fileName: polaris.log - # -- Log rotation configuration. - rotation: - # -- The maximum size of the log file before it is rotated. Should be expressed as a Kubernetes quantity. - maxFileSize: 100Mi - # -- The maximum number of backup files to keep. - maxBackupIndex: 5 - # -- An optional suffix to append to the rotated log files. If present, the rotated log files - # will be grouped in time buckets, and each bucket will contain at most maxBackupIndex files. - # The suffix must be in a date-time format that is understood by DateTimeFormatter. If the - # suffix ends with .gz or .zip, the rotated files will also be compressed using the - # corresponding algorithm. - fileSuffix: ~ # .yyyy-MM-dd.gz - # -- The log storage configuration. A persistent volume claim will be created using these - # settings. - storage: - # -- The storage class name of the persistent volume claim to create. - className: standard - # -- The size of the persistent volume claim to create. - size: 512Gi - # -- Labels to add to the persistent volume claim spec selector; a persistent volume with - # matching labels must exist. Leave empty if using dynamic provisioning. - selectorLabels: {} - # app.kubernetes.io/name: polaris - # app.kubernetes.io/instance: RELEASE-NAME - # -- Configuration for specific log categories. - categories: - org.apache.polaris: INFO - org.apache.iceberg.rest: INFO - # Useful to debug configuration issues: - # io.smallrye.config: DEBUG - # -- Configuration for MDC (Mapped Diagnostic Context). Values specified here will be added to the - # log context of all incoming requests and can be used in log patterns. - mdc: - # aid=polaris - # sid=polaris-service - {} - -# -- Realm context resolver configuration. -realmContext: - # -- The type of realm context resolver to use. Two built-in types are supported: default and test; - # test is not recommended for production as it does not perform any realm validation. - type: default - # -- List of valid realms, for use with the default realm context resolver. The first realm in - # the list is the default realm. Realms not in this list will be rejected. - realms: - - POLARIS - -# -- Polaris features configuration. -features: - # -- Features to enable or disable globally. If a feature is not present in the map, the default - # built-in value is used. - defaults: {} - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - # SUPPORTED_CATALOG_STORAGE_TYPES: - # - S3 - # - GCS - # - AZURE - # - FILE - # -- Features to enable or disable per realm. This field is a map of maps. The realm name is the key, and the value is a map of - # feature names to values. If a feature is not present in the map, the default value from the 'defaults' field is used. - realmOverrides: {} - # my-realm: - # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: true - -# -- Polaris persistence configuration. -persistence: - # -- The type of persistence to use. Two built-in types are supported: in-memory and eclipse-link. - type: eclipse-link # in-memory - # -- The configuration for the eclipse-link persistence manager. - eclipseLink: - # -- The secret name to pull persistence.xml from. - secret: - # -- The name of the secret to pull persistence.xml from. - # If not provided, the default built-in persistence.xml will be used. This is probably not what you want. - name: ~ - # -- The key in the secret to pull persistence.xml from. - key: persistence.xml - # -- The persistence unit name to use. - persistenceUnit: polaris - -# -- Polaris FileIO configuration. -fileIo: - # -- The type of file IO to use. Two built-in types are supported: default and wasb. The wasb one translates WASB paths to ABFS ones. - type: default - -# -- Storage credentials for the server. If the following properties are unset, default -# credentials will be used, in which case the pod must have the necessary permissions to access the storage. -storage: - # -- The secret to pull storage credentials from. - secret: - # -- The name of the secret to pull storage credentials from. - name: ~ - # -- The key in the secret to pull the AWS access key ID from. Only required when using AWS. - awsAccessKeyId: ~ - # -- The key in the secret to pull the AWS secret access key from. Only required when using AWS. - awsSecretAccessKey: ~ - # -- The key in the secret to pull the GCP token from. Only required when using GCP. - gcpToken: ~ - # -- The key in the secret to pull the GCP token expiration time from. Only required when using GCP. Must be a valid ISO 8601 duration. The default is PT1H (1 hour). - gcpTokenLifespan: ~ - -# -- Polaris authentication configuration. -authentication: - # -- The type of authentication to use. Two built-in types are supported: default and test; - # test is not recommended for production. - authenticator: +# -- Configures whether to enable the bootstrap metastore manager job +bootstrapMetastoreManager: false + +# -- Extra environment variables to add to the bootstrap metastore manager job (see `extraEnv` for an example) +bootstrapExtraEnv: [] + +# -- The secret name to pull persistence.xml from (ensure the key name is 'persistence.xml') +persistenceConfigSecret: ~ + +# -- Configures for polaris-server.yml +polarisServerConfig: + server: + # Maximum number of threads. + maxThreads: 200 + + # Minimum number of thread to keep alive. + minThreads: 10 + applicationConnectors: + # HTTP-specific options. + - type: http + + # The port on which the HTTP server listens for service requests. + port: 8181 + + adminConnectors: + - type: http + port: 8182 + + # The hostname of the interface to which the HTTP server socket wil be found. If omitted, the + # socket will listen on all interfaces. + # bindHost: localhost + + # ssl: + # keyStore: ./example.keystore + # keyStorePassword: example + # + # keyStoreType: JKS # (optional, JKS is default) + + # HTTP request log settings + requestLog: + appenders: + # Settings for logging to stdout. + - type: console + + # # Settings for logging to a file. + # - type: file + + # # The file to which statements will be logged. + # currentLogFilename: ./logs/request.log + + # # When the log file rolls over, the file will be archived to requests-2012-03-15.log.gz, + # # requests.log will be truncated, and new statements written to it. + # archivedLogFilenamePattern: ./logs/requests-%d.log.gz + + # # The maximum number of log files to archive. + # archivedFileCount: 14 + + # # Enable archiving if the request log entries go to the their own file + # archive: true + + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + + callContextResolver: type: default # -- The type of token service to use. Two built-in types are supported: default and test; # test is not recommended for production. diff --git a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java index ca1962e3c3..29b5424b8f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/PolarisConfiguration.java @@ -216,6 +216,7 @@ public static Builder builder() { .defaultValue( List.of( StorageConfigInfo.StorageTypeEnum.S3.name(), + StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE.name(), StorageConfigInfo.StorageTypeEnum.AZURE.name(), StorageConfigInfo.StorageTypeEnum.GCS.name(), StorageConfigInfo.StorageTypeEnum.FILE.name())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f3bfd6edf0..f8a37dd6f7 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,12 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.S3StorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -141,6 +143,32 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } + if (configInfo instanceof S3StorageConfigurationInfo) { + S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; + return S3StorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) + .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) + .setCredsVendingStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum + .valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsVendingStrategyEnum.class, + s3Config.getCredsVendingStrategy().name())) + .setCredsCatalogAndClientStrategy( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.admin.model.S3StorageConfigInfo + .CredsCatalogAndClientStrategyEnum.class, + s3Config.getCredsCatalogAndClientStrategy().name())) + .setAllowedLocations(s3Config.getAllowedLocations()) + .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKey( + s3Config.getS3CredentialsCatalogSecretAccessKey()) + .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) + .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .build(); + } if (configInfo instanceof AzureStorageConfigurationInfo) { AzureStorageConfigurationInfo azureConfig = (AzureStorageConfigurationInfo) configInfo; return AzureStorageConfigInfo.builder() @@ -250,6 +278,28 @@ public Builder setStorageConfigurationInfo( awsConfig.validateArn(awsConfigModel.getRoleArn()); config = awsConfig; break; + + case S3_COMPATIBLE: + S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + config = + new S3StorageConfigurationInfo( + PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, + S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsVendingStrategyEnum.class, + s3ConfigModel.getCredsVendingStrategy().name()), + S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( + org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo + .CredsCatalogAndClientStrategyEnum.class, + s3ConfigModel.getCredsCatalogAndClientStrategy().name()), + s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), + s3ConfigModel.getS3CredentialsClientAccessKeyId(), + s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3PathStyleAccess(), + new ArrayList<>(allowedLocations)); + break; case AZURE: AzureStorageConfigInfo azureConfigModel = (AzureStorageConfigInfo) storageConfigModel; AzureStorageConfigurationInfo azureConfigInfo = diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index c79aaf595d..13838e6af9 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -23,6 +23,8 @@ public enum PolarisCredentialProperty { AWS_KEY_ID(String.class, "s3.access-key-id", "the aws access key id"), AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), + AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), + AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 6b0638e837..4f290e77ba 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,6 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,6 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), @@ -241,6 +243,7 @@ public void validateMaxAllowedLocations(int maxAllowedLocations) { /** Polaris' storage type, each has a fixed prefix for its location */ public enum StorageType { S3("s3://"), + S3_COMPATIBLE("s3://"), AZURE(List.of("abfs://", "wasb://", "abfss://", "wasbs://")), GCS("gs://"), FILE("file://"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java new file mode 100644 index 0000000000..5fdbbdf37d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import java.net.URI; +import java.util.EnumMap; +import java.util.Set; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** Credential vendor that supports generating */ +public class S3CredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); + + private StsClient stsClient; + + // Constructor + public S3CredentialsStorageIntegration() { + super(S3CredentialsStorageIntegration.class.getName()); + } + + public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { + + LOGGER.debug("S3Compatible - createStsClient()"); + + LOGGER.info( + "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); + + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + + stsBuilder.region( + Region + .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares + // do not care about regions + stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create( + AwsBasicCredentials.create( + s3storageConfig.getS3CredentialsCatalogAccessKeyId(), + s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); + + this.stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3StorageConfigurationInfo storageConfig, + boolean allowListOperation, + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { + + LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + + switch (storageConfig.getCredsVendingStrategy()) { + case KEYS_SAME_AS_CATALOG: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsCatalogAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsCatalogSecretAccessKey()); + break; + + case KEYS_DEDICATED_TO_CLIENT: + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, + storageConfig.getS3CredentialsClientAccessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, + storageConfig.getS3CredentialsClientSecretAccessKey()); + break; + + case TOKEN_WITH_ASSUME_ROLE: + if (this.stsClient == null) { + createStsClient(storageConfig); + } + LOGGER.debug("S3Compatible - assumeRole !"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); + + propertiesMap.put( + PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put( + PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + break; + + // @TODO implement the MinIO external OpenID Connect - + // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 + // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: + // break; + } + + return propertiesMap; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java new file mode 100644 index 0000000000..c66deeff7d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ +public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private @NotNull CredsVendingStrategyEnum credsVendingStrategy; + private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; + private @NotNull String s3endpoint; + private @NotNull Boolean s3pathStyleAccess; + private @NotNull String s3CredentialsCatalogAccessKeyId; + private @NotNull String s3CredentialsCatalogSecretAccessKey; + private @Nullable String s3CredentialsClientAccessKeyId; + private @Nullable String s3CredentialsClientSecretAccessKey; + + // Define how and what the catalog client will receive as credentials + public static enum CredsVendingStrategyEnum { + KEYS_SAME_AS_CATALOG, + KEYS_DEDICATED_TO_CLIENT, + TOKEN_WITH_ASSUME_ROLE; + }; + + // Define how the access and secret keys will be receive during the catalo creation, if + // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, + // but less dynamic + public static enum CredsCatalogAndClientStrategyEnum { + VALUE, + ENV_VAR_NAME; + }; + + // Constructor + @JsonCreator + public S3StorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull + CredsVendingStrategyEnum credsVendingStrategy, + @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull + CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable + String s3CredentialsClientAccessKeyId, + @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable + String s3CredentialsClientSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "allowedLocations", required = true) @NotNull + List allowedLocations) { + + // Classic super and constructor stuff storing data in private internal properties + super(storageType, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.credsVendingStrategy = + CredsVendingStrategyEnum.valueOf( + CredsVendingStrategyEnum.class, credsVendingStrategy.name()); + this.credsCatalogAndClientStrategy = + CredsCatalogAndClientStrategyEnum.valueOf( + CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); + this.s3pathStyleAccess = s3PathStyleAccess; + this.s3endpoint = s3Endpoint; + + // The constructor is called multiple time during catalog life + // to do substitution only once, there is a basic if null test, otherwise affect the data from + // the "Polaris cache storage" + // this way the first time the value is retrived from the name of the variable + // next time the getenv will try to retrive a variable but is using the value as a nome, it will + // be null, we affect the value provided by "Polaris cache storage" + if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { + String cai = System.getenv(s3CredentialsCatalogAccessKeyId); + String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); + String cli = System.getenv(s3CredentialsClientAccessKeyId); + String cls = System.getenv(s3CredentialsClientSecretAccessKey); + this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = + (cls != null) ? cls : s3CredentialsClientSecretAccessKey; + } else { + this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; + this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; + this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; + } + } + + public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { + return this.credsVendingStrategy; + } + + public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { + return this.credsCatalogAndClientStrategy; + } + + public @NotNull String getS3Endpoint() { + return this.s3endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3pathStyleAccess; + } + + public @NotNull String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @NotNull String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3CredentialsClientAccessKeyId() { + return this.s3CredentialsClientAccessKeyId; + } + + public @Nullable String getS3CredentialsClientSecretAccessKey() { + return this.s3CredentialsClientSecretAccessKey; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType()) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md new file mode 100644 index 0000000000..08089f56f4 --- /dev/null +++ b/regtests/minio/Readme.md @@ -0,0 +1,42 @@ + + +# MiniIO Secured +## Minio and secured buckets with TLS self-signed / custom AC + +To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate + +## MiniIO generate self-signed certificates designed for docker-compose setup + +- Download minio certificate generator : https://github.com/minio/certgen +- ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs +- they will be mounted in default minio container placeholder + +## Test minIO secured TLS buckets from self-signed certificate with AWS CLI +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` + +## add to java cacerts only the public.crt as an AC +- ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` + +## remove from java cacerts the public.crt +- ```sudo keytool -delete -trustcacerts -cacerts -storepass changeit -noprompt -alias minio``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/certs/CAs/private.key b/regtests/minio/certs/CAs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/CAs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/CAs/public.crt b/regtests/minio/certs/CAs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/CAs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/certs/private.key b/regtests/minio/certs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/public.crt b/regtests/minio/certs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml new file mode 100644 index 0000000000..b61ca65370 --- /dev/null +++ b/regtests/minio/docker-compose.yml @@ -0,0 +1,69 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-minio: + image: minio/minio:latest + container_name: minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + minio_net: + aliases: + - warehouse.minio + ports: + - 9001:9001 + - 9000:9000 + volumes: + - ./miniodata:/data + - ./certs:/root/.minio/certs/ + command: ["server", "/data", "--console-address", ":9001"] + minio-configured: + depends_on: + - polaris-minio + image: minio/mc:latest + container_name: mc + networks: + minio_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + volumes: + - ./certs:/root/.mc/certs + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc policy set readwrite minio/warehouse; + /usr/bin/mc rm -r --force --quiet minio/warehouse2; + /usr/bin/mc mb --ignore-existing minio/warehouse2; + /usr/bin/mc policy set readwrite minio/warehouse2; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; + tail -f /dev/null + " +networks: + minio_net: + diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql new file mode 100644 index 0000000000..966ea6db62 --- /dev/null +++ b/regtests/minio/queries-for-spark.sql @@ -0,0 +1,42 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE DATABASE IF NOT EXISTS db1; +CREATE DATABASE IF NOT EXISTS db1.ns1; +CREATE DATABASE IF NOT EXISTS db1.ns2; +CREATE OR REPLACE TABLE db1.ns1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (10, 20); +INSERT INTO db1.ns1.table1 VALUES (11, 21); +INSERT INTO db1.ns1.table1 VALUES (12, 22); +SELECT * FROM db1.ns1.table1; + +CREATE OR REPLACE VIEW db1.ns2.view1 ( line_count COMMENT 'Count of lines') AS SELECT COUNT(1) as qty FROM db1.ns1.table1; +SELECT * FROM db1.ns2.view1; +INSERT INTO db1.ns1.table1 VALUES (13, 23); +SELECT * FROM db1.ns2.view1; + +CREATE DATABASE IF NOT EXISTS db1; +CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (3, 2); + +-- Test the second bucket allowed in the catalog +CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; +CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); +INSERT INTO db2.table1 VALUES (01, 02); +SELECT * FROM db2.table1; + +quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh new file mode 100755 index 0000000000..fc16fa5422 --- /dev/null +++ b/regtests/run_spark_sql_s3compatible.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ----------------------------------------------------------------------------- +# Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. +# ----------------------------------------------------------------------------- +# +# Prequisite: +# This script use a MinIO with TLS. +# Please follow instructions in regtests/minio/Readme.md and update your +# java cacerts with self-signed certificate +# +# Usage: +# ./run_spark_sql_s3compatible.sh [S3-location] +# +# Description: +# - Without arguments: Runs against default minio bucket s3://warehouse/polaris +# - With one arguments: Runs against a catalog backed by minio S3. +# - [S3-location] - The S3 path to use as the default base location for the catalog. +# +# Examples: +# - Run against AWS S3_COMPATIBLE: +# ./run_spark_sql_s3compatible.sh s3://warehouse/polaris + + +clear +if [ $# -ne 0 ] && [ $# -ne 1 ]; then + echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" + echo "Usage: ./run_spark_sql.sh [S3-location]" + exit 1 +fi + +# Init +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" +REGTEST_HOME=$(dirname $(realpath $0)) +cd ${REGTEST_HOME} + + +if [ $# -eq 0 ]; then + echo "creating a catalog backed by S3, default bucket is s3://warehouse/polaris" + S3_LOCATION="s3://warehouse/polaris" +fi + +if [ $# -eq 1 ]; then + echo "creating a catalog backed by S3 from first arg of this script respecting pattern 's3://mybucket/path'" + S3_LOCATION=$1 +fi +# Second location for testing catalog update +S3_LOCATION_2="s3://warehouse2/polaris/" + + + +# check if Polaris is running +polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) +if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then + echo "Polaris is not running on ${POLARIS_HOST:-localhost}:8181. End of script" + exit 1 +fi + +# check if cacerts contain MinIO certificate +cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) +echo $cert_response +if [ -z "$cert_response" ]; then + echo "There is no MinIO certificate in your cacerts, please read regtests/minio/Readme.md" + echo "End of script :-(" + exit 1 +fi + +# start minio with buckets and users +echo -e "\n\n-------\n\n" +echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." +docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured + +echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo -e "\n\n-------\n\n" + +# spark setup +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 + +echo "Doing spark setup... wait a moment" +./setup.sh > /dev/null 2>&1 + +if [ -z "${SPARK_HOME}"]; then + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi + + + + +# start of tests + +# creation of catalog + + +# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris +# CATALOG_ID=minio-user-catalog +# CATALOG_SECRET=12345678-minio-catalog +# CLIENT_ID=minio-user-client +# CLIENT_SECRET=12345678-minio-client + +echo -e "\n----\nCREATE Catalog\n" +response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{ + \"name\": \"manual_spark\", + \"id\": 100, + \"type\": \"INTERNAL\", + \"readOnly\": false, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" +) +echo -e "Catalog creation - response API http code : $response_catalog \n" +if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then + echo "Problem during catalog creation" + exit 1 +fi + + + + +echo -e "Get the catalog created : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + +# Try to update the catalog, - adding a second bucket in the alllowed locations +echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark \ + -d "{ + \"currentEntityVersion\":1, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3_COMPATIBLE\", + \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", + \"credsCatalogAndClientStrategy\": \"VALUE\", + \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], + \"s3.path-style-access\": true, + \"s3.endpoint\": \"https://localhost:9000\", + \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", + \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", + \"s3.credentials.client.access-key-id\": \"minio-user-client\", + \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + } + }" + + +echo -e "Get the catalog updated with second allowed location : \n" +curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark + + +echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + + +echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ + -d '{"name": "catalog_admin"}' > /dev/stderr + + +echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +${SPARK_HOME}/bin/spark-sql --verbose \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ + --conf spark.sql.catalog.polaris.warehouse=manual_spark \ + --conf spark.sql.defaultCatalog=polaris \ + --conf spark.hadoop.hive.cli.print.header=true \ + -f "minio/queries-for-spark.sql" + + +echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" +echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" +echo ":-)" +echo "" +docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down + diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index f61c67620f..80b0729d24 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,6 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -73,6 +74,9 @@ public PolarisStorageIntegrationProviderImpl( (PolarisStorageIntegration) new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; + case S3_COMPATIBLE: + storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + break; case GCS: storageIntegration = (PolarisStorageIntegration) diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 54c3b96759..d4a1f44fb8 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -862,6 +862,7 @@ components: type: string enum: - S3 + - S3_COMPATIBLE - GCS - AZURE - FILE @@ -877,6 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -905,6 +907,58 @@ components: required: - roleArn + S3StorageConfigInfo: + type: object + description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + credsCatalogAndClientStrategy: + type: string + enum: + - VALUE + - ENV_VAR_NAME + default: ENV_VAR_NAME + example: "ACCESS_KEY" + description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running + credsVendingStrategy: + type: string + enum: + - TOKEN_WITH_ASSUME_ROLE + - KEYS_SAME_AS_CATALOG + - KEYS_DEDICATED_TO_CLIENT + default: TOKEN_WITH_ASSUME_ROLE + description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) + s3.path-style-access: + type: boolean + description: if true use path style + default: false + s3.endpoint: + type: string + description: the S3 endpoint + example: "http[s]://host:port" + s3.credentials.catalog.access-key-id: + type: string + description: The ACCESS_KEY_ID used y the catalog to communicate with S3 + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secret-access-key: + type: string + description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 + example: "$AWS_SECRET_ACCESS_KEY" + s3.credentials.client.access-key-id: + type: string + description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_ACCESS_KEY_ID" + s3.credentials.client.secret-access-key: + type: string + description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected + example: "$AWS_SECRET_ACCESS_KEY" + required: + - credsVendingStrategy + - s3.endpoint + - s3.credentials.catalog.access-key-id + - s3.credentials.catalog.secret-access-key + AzureStorageConfigInfo: type: object description: azure storage configuration info From a737b23b5bd6faa02cfe790d671cdd9c3888f91e Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 21 Oct 2024 22:30:38 +0200 Subject: [PATCH 07/17] Create Readme.md for s3-compatible Better descriptions typo & comments Refacoring with skipCredentialSubscopingIndirection -> finaly removed Rebase with AWS updates from main branch adding roleArn, camelCase refactoring, typo, cleaning Add default AWS credentials provider for STS Error Co-authored-by: Gerrit-K Rebase from quarkus and keep only sts with some suggestions from code review helm unit test --- helm/polaris/tests/configmap_test.yaml | 286 ++++++++---------- .../polaris/core/entity/CatalogEntity.java | 52 ++-- .../storage/PolarisCredentialProperty.java | 3 +- .../PolarisStorageConfigurationInfo.java | 4 +- .../s3/S3CredentialsStorageIntegration.java | 138 --------- .../s3/S3StorageConfigurationInfo.java | 164 ---------- ...mpatibleCredentialsStorageIntegration.java | 220 ++++++++++++++ .../S3CompatibleStorageConfigurationInfo.java | 113 +++++++ .../main/resources/application-it.properties | 2 +- .../src/main/resources/application.properties | 2 +- regtests/minio/Readme.md | 11 +- regtests/minio/docker-compose.yml | 4 - regtests/minio/miniodata/Readme.md | 1 + regtests/run_spark_sql_s3compatible.sh | 70 +++-- ...PolarisStorageIntegrationProviderImpl.java | 6 +- spec/polaris-management-service.yml | 57 ++-- 16 files changed, 553 insertions(+), 580 deletions(-) delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java delete mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java create mode 100644 regtests/minio/miniodata/Readme.md diff --git a/helm/polaris/tests/configmap_test.yaml b/helm/polaris/tests/configmap_test.yaml index ef725ec4f3..e070bf0dcf 100644 --- a/helm/polaris/tests/configmap_test.yaml +++ b/helm/polaris/tests/configmap_test.yaml @@ -183,159 +183,141 @@ tests: set: logging: { file: { enabled: true, json: true }, console: { enabled: true, json: true } } asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.enable=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.json=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.json=true" } - - - it: should include logging categories - set: - logging: - categories: - # compact style - org.acme: DEBUG - # expanded style - org: - acme: - service: INFO - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme\".level=DEBUG" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme.service\".level=INFO" } - - - it: should include MDC context - set: - logging: - mdc: - # compact style - org.acme: foo - # expanded style - org: - acme: - service: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme\"=foo" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme.service\"=foo" } - - - it: should include telemetry configuration - set: - tracing: { enabled: true, endpoint: http://custom:4317, attributes: { service.name: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.exporter.otlp.endpoint=http://custom:4317" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=service.name=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=foo=bar" } - - - it: should include set sample rate numeric - set: - tracing: { enabled: true, sample: "0.123" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_traceidratio" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler.arg=0.123" } - - - it: should include set sample rate "all" - set: - tracing: { enabled: true, sample: "all" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_always_on" } - - - it: should include set sample rate "none" - set: - tracing: { enabled: true, sample: "none" } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=always_off" } - - - it: should disable tracing by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should disable tracing - set: - tracing: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } - - - it: should include custom metrics - set: - metrics: { enabled: true, tags: { app: custom, foo: bar } } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.app=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.foo=bar" } - - - it: should disable metrics - set: - metrics: { enabled: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.micrometer.enabled=false" } - - - it: should include advanced configuration - set: - advancedConfig: - # compact style - quarkus.compact.custom: true - # expanded style - quarkus: - expanded: - custom: foo - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.compact.custom=true" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.expanded.custom=foo" } - - - it: should not include CORS configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors" } - not: true - - - it: should include CORS configuration if defined - set: - cors: { allowedOrigins: [ "http://localhost:3000", "https://localhost:4000" ], allowedMethods: [ "GET", "POST" ], allowedHeaders: [ "X-Custom1", "X-Custom2" ], exposedHeaders: [ "X-Exposed-Custom1", "X-Exposed-Custom2" ], accessControlMaxAge: "PT1H", accessControlAllowCredentials: false } - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.origins=http://localhost:3000,https://localhost:4000" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.methods=GET,POST" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.headers=X-Custom1,X-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.exposed-headers=X-Exposed-Custom1,X-Exposed-Custom2" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-max-age=PT1H" } - - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-allow-credentials=false" } - - - it: should configure rate-limiter with default values - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter no-op - set: - rateLimiter.type: no-op - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } - - - it: should configure rate-limiter with default token bucket values - set: - rateLimiter.type: default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=default" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=9999" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT10S" } - - - it: should configure rate-limiter with custom token bucket values - set: - rateLimiter: - type: custom - tokenBucket: - type: custom - requestsPerSecond: 1234 - window: PT5S - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=custom" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=1234" } - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT5S" } - - - it: should not include tasks configuration by default - asserts: - - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks" } - not: true - - - it: should include tasks configuration if defined + - equal: + path: data + value: + polaris-server.yml: |- + authenticator: + class: org.apache.polaris.service.auth.TestInlineBearerTokenPolarisAuthenticator + callContextResolver: + type: default + cors: + allowed-credentials: true + allowed-headers: + - '*' + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + exposed-headers: + - '*' + preflight-max-age: 600 + defaultRealms: + - default-realm + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + - S3_COMPATIBLE + - GCS + - AZURE + - FILE + io: + factoryType: default + logging: + appenders: + - logFormat: '%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] + %c{30}: %m %kvp%n%ex' + threshold: ALL + type: console + level: INFO + loggers: + org.apache.iceberg.rest: DEBUG + org.apache.polaris: DEBUG + maxRequestBodyBytes: -1 + metaStoreManager: + type: in-memory + oauth2: + type: test + rateLimiter: + type: no-op + realmContextResolver: + type: default + server: + adminConnectors: + - port: 8182 + type: http + applicationConnectors: + - port: 8181 + type: http + maxThreads: 200 + minThreads: 10 + requestLog: + appenders: + - type: console + - it: should set config map data (auto sorted) set: - tasks: { maxConcurrentTasks: 10, maxQueuedTasks: 20 } + polarisServerConfig: + server: + maxThreads: 200 + minThreads: 10 + applicationConnectors: + - type: http + port: 8181 + adminConnectors: + - type: http + port: 8182 + requestLog: + appenders: + - type: console + featureConfiguration: + ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + SUPPORTED_CATALOG_STORAGE_TYPES: + - S3 + callContextResolver: + type: default + realmContextResolver: + type: default + defaultRealms: + - default-realm + metaStoreManager: + type: eclipse-link + persistence-unit: polaris + conf-file: /eclipselink-config/conf.jar!/persistence.xml + io: + factoryType: default + oauth2: + type: default + tokenBroker: + type: symmetric-key + secret: polaris + authenticator: + class: org.apache.polaris.service.auth.DefaultPolarisAuthenticator + cors: + allowed-origins: + - http://localhost:8080 + allowed-timing-origins: + - http://localhost:8080 + allowed-methods: + - PATCH + - POST + - DELETE + - GET + - PUT + allowed-headers: + - "*" + exposed-headers: + - "*" + preflight-max-age: 600 + allowed-credentials: true + logging: + level: INFO + loggers: + org.apache.iceberg.rest: INFO + org.apache.polaris: INFO + appenders: + - type: console + threshold: ALL + logFormat: "%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] %c{30}: %m %kvp%n%ex" + maxRequestBodyBytes: -1 + rateLimiter: + type: no-op asserts: - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-concurrent-tasks=10" } - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-queued-tasks=20" } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f8a37dd6f7..ab70b9b497 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,14 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; -import org.apache.polaris.core.admin.model.S3StorageConfigInfo; +import org.apache.polaris.core.admin.model.S3CompatibleStorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -143,30 +143,19 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } - if (configInfo instanceof S3StorageConfigurationInfo) { - S3StorageConfigurationInfo s3Config = (S3StorageConfigurationInfo) configInfo; - return S3StorageConfigInfo.builder() + if (configInfo instanceof S3CompatibleStorageConfigurationInfo) { + S3CompatibleStorageConfigurationInfo s3Config = + (S3CompatibleStorageConfigurationInfo) configInfo; + return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) - .setCredsVendingStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo.CredsVendingStrategyEnum - .valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsVendingStrategyEnum.class, - s3Config.getCredsVendingStrategy().name())) - .setCredsCatalogAndClientStrategy( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.admin.model.S3StorageConfigInfo - .CredsCatalogAndClientStrategyEnum.class, - s3Config.getCredsCatalogAndClientStrategy().name())) .setAllowedLocations(s3Config.getAllowedLocations()) - .setS3CredentialsCatalogAccessKeyId(s3Config.getS3CredentialsCatalogAccessKeyId()) - .setS3CredentialsCatalogSecretAccessKey( + .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKeyEnvVar( s3Config.getS3CredentialsCatalogSecretAccessKey()) - .setS3CredentialsClientAccessKeyId(s3Config.getS3CredentialsClientSecretAccessKey()) - .setS3CredentialsClientSecretAccessKey(s3Config.getS3CredentialsClientAccessKeyId()) + .setS3Region(s3Config.getS3Region()) + .setS3RoleArn(s3Config.getS3RoleArn()) .build(); } if (configInfo instanceof AzureStorageConfigurationInfo) { @@ -280,24 +269,17 @@ public Builder setStorageConfigurationInfo( break; case S3_COMPATIBLE: - S3StorageConfigInfo s3ConfigModel = (S3StorageConfigInfo) storageConfigModel; + S3CompatibleStorageConfigInfo s3ConfigModel = + (S3CompatibleStorageConfigInfo) storageConfigModel; config = - new S3StorageConfigurationInfo( + new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, - S3StorageConfigInfo.CredsVendingStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsVendingStrategyEnum.class, - s3ConfigModel.getCredsVendingStrategy().name()), - S3StorageConfigInfo.CredsCatalogAndClientStrategyEnum.valueOf( - org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo - .CredsCatalogAndClientStrategyEnum.class, - s3ConfigModel.getCredsCatalogAndClientStrategy().name()), s3ConfigModel.getS3Endpoint(), - s3ConfigModel.getS3CredentialsCatalogAccessKeyId(), - s3ConfigModel.getS3CredentialsCatalogSecretAccessKey(), - s3ConfigModel.getS3CredentialsClientAccessKeyId(), - s3ConfigModel.getS3CredentialsClientSecretAccessKey(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), + s3ConfigModel.getS3Region(), + s3ConfigModel.getS3RoleArn(), new ArrayList<>(allowedLocations)); break; case AZURE: diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index 13838e6af9..b7f1a9808c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -24,7 +24,8 @@ public enum PolarisCredentialProperty { AWS_SECRET_KEY(String.class, "s3.secret-access-key", "the aws access key secret"), AWS_TOKEN(String.class, "s3.session-token", "the aws scoped access token"), AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), - AWS_PATH_STYLE_ACCESS(Boolean.class, "s3.path-style-access", "the aws s3 path style access"), + AWS_PATH_STYLE_ACCESS( + Boolean.class, "s3.path-style-access", "whether or not to use path-style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 4f290e77ba..c6eac4f7e4 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,7 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; -import org.apache.polaris.core.storage.s3.S3StorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), - @JsonSubTypes.Type(value = S3StorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3CompatibleStorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java deleted file mode 100644 index 5fdbbdf37d..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3CredentialsStorageIntegration.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import java.net.URI; -import java.util.EnumMap; -import java.util.Set; -import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.storage.InMemoryStorageIntegration; -import org.apache.polaris.core.storage.PolarisCredentialProperty; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.sts.StsClient; -import software.amazon.awssdk.services.sts.StsClientBuilder; -import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; -import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; - -/** Credential vendor that supports generating */ -public class S3CredentialsStorageIntegration - extends InMemoryStorageIntegration { - - private static final Logger LOGGER = - LoggerFactory.getLogger(S3CredentialsStorageIntegration.class); - - private StsClient stsClient; - - // Constructor - public S3CredentialsStorageIntegration() { - super(S3CredentialsStorageIntegration.class.getName()); - } - - public void createStsClient(S3StorageConfigurationInfo s3storageConfig) { - - LOGGER.debug("S3Compatible - createStsClient()"); - - LOGGER.info( - "S3Compatible - AWS STS endpoint is unique and different from the S3 Endpoint. AWS SDK need to be overided with dedicated Endpoint from S3Compatible, otherwise the AWS STS url is targeted"); - - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - - stsBuilder.region( - Region - .US_WEST_1); // default region to avoid bug, because most (all?) S3 compatible softwares - // do not care about regions - stsBuilder.endpointOverride(URI.create(s3storageConfig.getS3Endpoint())); - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create( - AwsBasicCredentials.create( - s3storageConfig.getS3CredentialsCatalogAccessKeyId(), - s3storageConfig.getS3CredentialsCatalogSecretAccessKey()))); - - this.stsClient = stsBuilder.build(); - LOGGER.debug("S3Compatible - stsClient successfully built"); - } - - /** {@inheritDoc} */ - @Override - public EnumMap getSubscopedCreds( - @NotNull PolarisDiagnostics diagnostics, - @NotNull S3StorageConfigurationInfo storageConfig, - boolean allowListOperation, - @NotNull Set allowedReadLocations, - @NotNull Set allowedWriteLocations) { - - LOGGER.debug("S3Compatible - getSubscopedCreds - applying credential strategy"); - - EnumMap propertiesMap = - new EnumMap<>(PolarisCredentialProperty.class); - propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); - propertiesMap.put( - PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, - storageConfig.getS3PathStyleAccess().toString()); - - switch (storageConfig.getCredsVendingStrategy()) { - case KEYS_SAME_AS_CATALOG: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsCatalogAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsCatalogSecretAccessKey()); - break; - - case KEYS_DEDICATED_TO_CLIENT: - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, - storageConfig.getS3CredentialsClientAccessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, - storageConfig.getS3CredentialsClientSecretAccessKey()); - break; - - case TOKEN_WITH_ASSUME_ROLE: - if (this.stsClient == null) { - createStsClient(storageConfig); - } - LOGGER.debug("S3Compatible - assumeRole !"); - AssumeRoleResponse response = - stsClient.assumeRole( - AssumeRoleRequest.builder().roleSessionName("PolarisCredentialsSTS").build()); - - propertiesMap.put( - PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); - propertiesMap.put( - PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); - break; - - // @TODO implement the MinIO external OpenID Connect - - // https://min.io/docs/minio/linux/developers/security-token-service.html?ref=docs-redirect#id1 - // case TOKEN_WITH_ASSUME_ROLE_WITH_WEB_IDENTITY: - // break; - } - - return propertiesMap; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java deleted file mode 100644 index c66deeff7d..0000000000 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3/S3StorageConfigurationInfo.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.polaris.core.storage.s3; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.MoreObjects; -import java.util.List; -import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; - -/** Polaris Storage Configuration information for an S3 Compatible solution, MinIO, Dell ECS... */ -public class S3StorageConfigurationInfo extends PolarisStorageConfigurationInfo { - - // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. - @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; - private @NotNull CredsVendingStrategyEnum credsVendingStrategy; - private @NotNull CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy; - private @NotNull String s3endpoint; - private @NotNull Boolean s3pathStyleAccess; - private @NotNull String s3CredentialsCatalogAccessKeyId; - private @NotNull String s3CredentialsCatalogSecretAccessKey; - private @Nullable String s3CredentialsClientAccessKeyId; - private @Nullable String s3CredentialsClientSecretAccessKey; - - // Define how and what the catalog client will receive as credentials - public static enum CredsVendingStrategyEnum { - KEYS_SAME_AS_CATALOG, - KEYS_DEDICATED_TO_CLIENT, - TOKEN_WITH_ASSUME_ROLE; - }; - - // Define how the access and secret keys will be receive during the catalo creation, if - // ENV_VAR_NAME, the variable must exist in the Polaris running environement - it is more secured, - // but less dynamic - public static enum CredsCatalogAndClientStrategyEnum { - VALUE, - ENV_VAR_NAME; - }; - - // Constructor - @JsonCreator - public S3StorageConfigurationInfo( - @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, - @JsonProperty(value = "credsVendingStrategy", required = true) @NotNull - CredsVendingStrategyEnum credsVendingStrategy, - @JsonProperty(value = "credsCatalogAndClientStrategy", required = true) @NotNull - CredsCatalogAndClientStrategyEnum credsCatalogAndClientStrategy, - @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @NotNull - String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @NotNull - String s3CredentialsCatalogSecretAccessKey, - @JsonProperty(value = "s3CredentialsClientAccessKeyId", required = false) @Nullable - String s3CredentialsClientAccessKeyId, - @JsonProperty(value = "s3CredentialsClientSecretAccessKey", required = false) @Nullable - String s3CredentialsClientSecretAccessKey, - @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull - Boolean s3PathStyleAccess, - @JsonProperty(value = "allowedLocations", required = true) @NotNull - List allowedLocations) { - - // Classic super and constructor stuff storing data in private internal properties - super(storageType, allowedLocations); - validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); - this.credsVendingStrategy = - CredsVendingStrategyEnum.valueOf( - CredsVendingStrategyEnum.class, credsVendingStrategy.name()); - this.credsCatalogAndClientStrategy = - CredsCatalogAndClientStrategyEnum.valueOf( - CredsCatalogAndClientStrategyEnum.class, credsCatalogAndClientStrategy.name()); - this.s3pathStyleAccess = s3PathStyleAccess; - this.s3endpoint = s3Endpoint; - - // The constructor is called multiple time during catalog life - // to do substitution only once, there is a basic if null test, otherwise affect the data from - // the "Polaris cache storage" - // this way the first time the value is retrived from the name of the variable - // next time the getenv will try to retrive a variable but is using the value as a nome, it will - // be null, we affect the value provided by "Polaris cache storage" - if (CredsCatalogAndClientStrategyEnum.ENV_VAR_NAME.equals(credsCatalogAndClientStrategy)) { - String cai = System.getenv(s3CredentialsCatalogAccessKeyId); - String cas = System.getenv(s3CredentialsCatalogSecretAccessKey); - String cli = System.getenv(s3CredentialsClientAccessKeyId); - String cls = System.getenv(s3CredentialsClientSecretAccessKey); - this.s3CredentialsCatalogAccessKeyId = (cai != null) ? cai : s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = - (cas != null) ? cas : s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = (cli != null) ? cli : s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = - (cls != null) ? cls : s3CredentialsClientSecretAccessKey; - } else { - this.s3CredentialsCatalogAccessKeyId = s3CredentialsCatalogAccessKeyId; - this.s3CredentialsCatalogSecretAccessKey = s3CredentialsCatalogSecretAccessKey; - this.s3CredentialsClientAccessKeyId = s3CredentialsClientAccessKeyId; - this.s3CredentialsClientSecretAccessKey = s3CredentialsClientSecretAccessKey; - } - } - - public @NotNull CredsVendingStrategyEnum getCredsVendingStrategy() { - return this.credsVendingStrategy; - } - - public @NotNull CredsCatalogAndClientStrategyEnum getCredsCatalogAndClientStrategy() { - return this.credsCatalogAndClientStrategy; - } - - public @NotNull String getS3Endpoint() { - return this.s3endpoint; - } - - public @NotNull Boolean getS3PathStyleAccess() { - return this.s3pathStyleAccess; - } - - public @NotNull String getS3CredentialsCatalogAccessKeyId() { - return this.s3CredentialsCatalogAccessKeyId; - } - - public @NotNull String getS3CredentialsCatalogSecretAccessKey() { - return this.s3CredentialsCatalogSecretAccessKey; - } - - public @Nullable String getS3CredentialsClientAccessKeyId() { - return this.s3CredentialsClientAccessKeyId; - } - - public @Nullable String getS3CredentialsClientSecretAccessKey() { - return this.s3CredentialsClientSecretAccessKey; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("storageType", getStorageType()) - .add("storageType", getStorageType().name()) - .add("allowedLocation", getAllowedLocations()) - .toString(); - } - - @Override - public String getFileIoImplClassName() { - return "org.apache.iceberg.aws.s3.S3FileIO"; - } -} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java new file mode 100644 index 0000000000..3dfb03814d --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; + +import jakarta.annotation.Nonnull; +import java.net.URI; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.apache.polaris.core.PolarisConfigurationStore; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.context.RealmContext; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.apache.polaris.core.storage.StorageUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; +import software.amazon.awssdk.policybuilder.iam.IamEffect; +import software.amazon.awssdk.policybuilder.iam.IamPolicy; +import software.amazon.awssdk.policybuilder.iam.IamResource; +import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** S3 compatible implementation of PolarisStorageIntegration */ +public class S3CompatibleCredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); + private final PolarisConfigurationStore configurationStore; + + public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { + super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); + this.configurationStore = configurationStore; + } + + /** {@inheritDoc} */ + @Override + public EnumMap getSubscopedCreds( + @Nonnull RealmContext realmContext, + @Nonnull PolarisDiagnostics diagnostics, + @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + boolean allowListOperation, + @Nonnull Set allowedReadLocations, + @Nonnull Set allowedWriteLocations) { + + StsClient stsClient; + String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); + String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + if (storageConfig.getS3Region() != null) { + propertiesMap.put(PolarisCredentialProperty.CLIENT_REGION, storageConfig.getS3Region()); + } + + LOGGER.debug("S3Compatible - createStsClient()"); + try { + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (caI != null && caS != null) { + // else default provider build credentials from profile or standard AWS env var + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug( + "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); + } + stsClient = stsBuilder.build(); + LOGGER.debug("S3Compatible - stsClient successfully built"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleSessionName("PolarisCredentialsSTS") + .roleArn( + (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .policy( + policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + .toJson()) + .durationSeconds( + configurationStore.getConfiguration( + realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put( + PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + LOGGER.debug( + "S3Compatible - assumeRole - Token Expiration at : {}", + response.credentials().expiration().toString()); + + } catch (Exception e) { + System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + } + + return propertiesMap; + } + + /* + * function from AwsCredentialsStorageIntegration but without roleArn parameter + */ + private IamPolicy policyString( + boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = "arn:aws:s3:::"; + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = StorageUtil.getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /* function from AwsCredentialsStorageIntegration */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java new file mode 100644 index 0000000000..776279546a --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +/** + * S3-Compatible Storage Configuration. This class holds the parameters needed to connect to + * S3-compatible storage services such as MinIO, Ceph, Dell ECS, etc. + */ +public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required + // for allowed read and write locations for subscoping creds. + @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; + private final @NotNull String s3Endpoint; + private final @Nullable String s3CredentialsCatalogAccessKeyId; + private final @Nullable String s3CredentialsCatalogSecretAccessKey; + private final @NotNull Boolean s3PathStyleAccess; + private final @Nullable String s3Region; + private final @Nullable String s3RoleArn; + + @JsonCreator + public S3CompatibleStorageConfigurationInfo( + @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, + @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + Boolean s3PathStyleAccess, + @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, + @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, + @JsonProperty(value = "allowedLocations", required = true) @Nullable + List allowedLocations) { + + super(StorageType.S3_COMPATIBLE, allowedLocations); + validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); + this.s3PathStyleAccess = s3PathStyleAccess; + this.s3Endpoint = s3Endpoint; + this.s3CredentialsCatalogAccessKeyId = + (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; + this.s3Region = s3Region; + this.s3RoleArn = s3RoleArn; + } + + public @NotNull String getS3Endpoint() { + return this.s3Endpoint; + } + + public @NotNull Boolean getS3PathStyleAccess() { + return this.s3PathStyleAccess; + } + + public @Nullable String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @Nullable String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3RoleArn() { + return this.s3RoleArn; + } + + public @Nullable String getS3Region() { + return this.s3Region; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .add("s3Region", getS3Region()) + .add("s3RoleArn", getS3RoleArn()) + .add("s3PathStyleAccess", getS3PathStyleAccess()) + .add("s3Endpoint", getS3Endpoint()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/quarkus/defaults/src/main/resources/application-it.properties b/quarkus/defaults/src/main/resources/application-it.properties index 5f46d203f2..e4ad1a6e02 100644 --- a/quarkus/defaults/src/main/resources/application-it.properties +++ b/quarkus/defaults/src/main/resources/application-it.properties @@ -35,7 +35,7 @@ polaris.features.defaults."ALLOW_WILDCARD_LOCATION"=true polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=true polaris.features.defaults."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_it"=true polaris.features.defaults."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","GCS","AZURE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","S3_COMPATIBLE","GCS","AZURE"] polaris.realm-context.realms=POLARIS,OTHER diff --git a/quarkus/defaults/src/main/resources/application.properties b/quarkus/defaults/src/main/resources/application.properties index d3a2057372..9bc6cc03e7 100644 --- a/quarkus/defaults/src/main/resources/application.properties +++ b/quarkus/defaults/src/main/resources/application.properties @@ -90,7 +90,7 @@ polaris.realm-context.header-name=Polaris-Realm polaris.realm-context.require-header=false polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=false -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","GCS","AZURE","FILE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","S3_COMPATIBLE","GCS","AZURE","FILE"] # realm overrides # polaris.features.realm-overrides."my-realm"."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_TEST"=true # polaris.features.realm-overrides."my-realm"."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md index 08089f56f4..afa54e0b2a 100644 --- a/regtests/minio/Readme.md +++ b/regtests/minio/Readme.md @@ -18,22 +18,21 @@ --> # MiniIO Secured -## Minio and secured buckets with TLS self-signed / custom AC +## Minio and secured buckets with TLS self-signed / custom Certificate Authority -To be able to test Polaris with buckets in TLS under custom AC or self-signed certificate +To be able to test Polaris with buckets in TLS under custom Certificate Authority or self-signed certificate ## MiniIO generate self-signed certificates designed for docker-compose setup - Download minio certificate generator : https://github.com/minio/certgen -- ```./certgen -host "localhost,minio,*"``` -- put them in ./certs and ./certs/CAs -- they will be mounted in default minio container placeholder +- Generate certifications: ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs. They will be mounted in the default MinIO container placeholder. ## Test minIO secured TLS buckets from self-signed certificate with AWS CLI - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` - ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` -## add to java cacerts only the public.crt as an AC +## add to java cacerts only the public.crt as an Certificate Authority - ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` - ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml index b61ca65370..ff6a5c0a72 100644 --- a/regtests/minio/docker-compose.yml +++ b/regtests/minio/docker-compose.yml @@ -54,14 +54,10 @@ services: until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force --quiet minio/warehouse; /usr/bin/mc mb --ignore-existing minio/warehouse; - /usr/bin/mc policy set readwrite minio/warehouse; /usr/bin/mc rm -r --force --quiet minio/warehouse2; /usr/bin/mc mb --ignore-existing minio/warehouse2; - /usr/bin/mc policy set readwrite minio/warehouse2; /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; - /usr/bin/mc admin user add minio minio-user-client 12345678-minio-client; /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; - /usr/bin/mc admin policy attach minio readwrite --user minio-user-client; tail -f /dev/null " networks: diff --git a/regtests/minio/miniodata/Readme.md b/regtests/minio/miniodata/Readme.md new file mode 100644 index 0000000000..d65c6f4723 --- /dev/null +++ b/regtests/minio/miniodata/Readme.md @@ -0,0 +1 @@ +# Folder for MinIO data container volume diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index fc16fa5422..172488b7b2 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -47,7 +47,6 @@ if [ $# -ne 0 ] && [ $# -ne 1 ]; then fi # Init -SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} @@ -65,6 +64,20 @@ fi S3_LOCATION_2="s3://warehouse2/polaris/" +# SPARK_BEARER_TOKEN +if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi +SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') +if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi # check if Polaris is running polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) @@ -73,6 +86,7 @@ if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then exit 1 fi + # check if cacerts contain MinIO certificate cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) echo $cert_response @@ -108,14 +122,15 @@ fi # creation of catalog +echo """ +These environnement variables have to be available to Polaris service : +CATALOG_S3_KEY_ID = minio-user-catalog +CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +export CATALOG_S3_KEY_ID=minio-user-catalog +export CATALOG_S3_KEY_SECRET=12345678-minio-catalog +""" -# if "credsCatalogAndClientStrategy"=="ENV_VAR_NAME" and not "VALUE", then the following environnement variables have to be available to Polaris -# CATALOG_ID=minio-user-catalog -# CATALOG_SECRET=12345678-minio-catalog -# CLIENT_ID=minio-user-client -# CLIENT_SECRET=12345678-minio-client - -echo -e "\n----\nCREATE Catalog\n" +echo -e "\n----\nCREATE Catalog with few parameters \n" response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -130,18 +145,12 @@ response_catalog=$(curl --output /dev/null -w "%{http_code}" -s -i -X POST -H " }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\"], - \"s3.path-style-access\": true, - \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.endpoint\": \"https://localhost:9000\" } }" ) + echo -e "Catalog creation - response API http code : $response_catalog \n" if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then echo "Problem during catalog creation" @@ -149,16 +158,14 @@ if [ $response_catalog -ne 201 ] && [ $response_catalog -ne 409 ]; then fi - - echo -e "Get the catalog created : \n" curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark -# Try to update the catalog, - adding a second bucket in the alllowed locations -echo -e "\n----\nUPDATE the catalog, - adding a second bucket in the alllowed locations\n" +# Update the catalog +echo -e "\n----\nUPDATE the catalog v1, - adding a second bucket in the alllowed locations\n" curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ @@ -170,26 +177,17 @@ curl -s -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ }, \"storageConfigInfo\": { \"storageType\": \"S3_COMPATIBLE\", - \"credsVendingStrategy\": \"TOKEN_WITH_ASSUME_ROLE\", - \"credsCatalogAndClientStrategy\": \"VALUE\", \"allowedLocations\": [\"${S3_LOCATION}/\",\"${S3_LOCATION_2}/\"], - \"s3.path-style-access\": true, \"s3.endpoint\": \"https://localhost:9000\", - \"s3.credentials.catalog.access-key-id\": \"minio-user-catalog\", - \"s3.credentials.catalog.secret-access-key\": \"12345678-minio-catalog\", - \"s3.credentials.client.access-key-id\": \"minio-user-client\", - \"s3.credentials.client.secret-access-key\": \"12345678-minio-client\" + \"s3.region\": \"region-1\", + \"s3.pathStyleAccess\": true, + \"s3.credentials.catalog.accessKeyEnvVar\": \"CATALOG_S3_KEY_ID\", + \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"CATALOG_S3_KEY_SECRET\", + \"s3.roleArn\": \"arn:xxx:xxx:xxx:xxxx\" } }" -echo -e "Get the catalog updated with second allowed location : \n" -curl -s -i -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ - -H 'Accept: application/json' \ - -H 'Content-Type: application/json' \ - http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark - - echo -e "\n----\nAdd TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ @@ -212,9 +210,9 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" + echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" echo ":-)" echo "" -docker-compose --progress quiet --project-name minio --project-directory minio/ -f minio/docker-compose.yml down - +docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 80b0729d24..9c3aeedb1b 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,7 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; -import org.apache.polaris.core.storage.s3.S3CredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleCredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -75,7 +75,9 @@ public PolarisStorageIntegrationProviderImpl( new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; case S3_COMPATIBLE: - storageIntegration = (PolarisStorageIntegration) new S3CredentialsStorageIntegration(); + storageIntegration = + (PolarisStorageIntegration) + new S3CompatibleCredentialsStorageIntegration(configurationStore); break; case GCS: storageIntegration = diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index d4a1f44fb8..370a62dc0d 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -878,7 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" - S3_COMPATIBLE: "#/components/schemas/S3StorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3CompatibleStorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -907,57 +907,38 @@ components: required: - roleArn - S3StorageConfigInfo: + S3CompatibleStorageConfigInfo: type: object - description: S3 compatible storage configuration info (MinIO, Dell ECS, Netapp StorageGRID, ...) + description: s3-compatible storage configuration info (MinIO, Ceph, Dell ECS, Netapp StorageGRID, ...) allOf: - $ref: '#/components/schemas/StorageConfigInfo' properties: - credsCatalogAndClientStrategy: - type: string - enum: - - VALUE - - ENV_VAR_NAME - default: ENV_VAR_NAME - example: "ACCESS_KEY" - description: When you send key VALUE directly via this command, they should apear in logs. By ENV_VAR_NAME without dollar, only a reference will appear in logs, but the value have to be available as environnement variable in the context where Polaris is running - credsVendingStrategy: - type: string - enum: - - TOKEN_WITH_ASSUME_ROLE - - KEYS_SAME_AS_CATALOG - - KEYS_DEDICATED_TO_CLIENT - default: TOKEN_WITH_ASSUME_ROLE - description: The catalog strategy to vend credentials to client. Options possible are same keys than catalog, keys dedicated to clients, or Tokens with STS methods 'assumeRole' for Dell ECS or NetApp StorageGrid solution, 'truc' for MinIo solution) - s3.path-style-access: - type: boolean - description: if true use path style - default: false s3.endpoint: type: string description: the S3 endpoint example: "http[s]://host:port" - s3.credentials.catalog.access-key-id: + s3.credentials.catalog.accessKeyEnvVar: type: string - description: The ACCESS_KEY_ID used y the catalog to communicate with S3 - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.catalog.secret-access-key: + description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: The SECRET_ACCESS_KEY used y the catalog to communicate with S3 - example: "$AWS_SECRET_ACCESS_KEY" - s3.credentials.client.access-key-id: + description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" + s3.pathStyleAccess: + type: boolean + description: Whether or not to use path-style access + default: false + s3.region: type: string - description: Optional - ACCESS_KEY_ID vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_ACCESS_KEY_ID" - s3.credentials.client.secret-access-key: + description: Optional - the s3 region where data is stored + example: "rack-1 or us-east-1" + s3.roleArn: type: string - description: Optional - SECRET_ACCESS_KEY vended by catalog to the client in case of this CredentialVendedStrategy is selected - example: "$AWS_SECRET_ACCESS_KEY" + description: Optional - a s3 role arn + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" required: - - credsVendingStrategy - s3.endpoint - - s3.credentials.catalog.access-key-id - - s3.credentials.catalog.secret-access-key AzureStorageConfigInfo: type: object From 819f7735f66b6fd0b18d94cc2977502c708a7a2a Mon Sep 17 00:00:00 2001 From: lefebsy Date: Thu, 27 Feb 2025 19:55:21 +0100 Subject: [PATCH 08/17] add s3 profile --- .../polaris/core/entity/CatalogEntity.java | 2 + .../polaris/core/storage/StorageUtil.java | 137 ++++++++++++++++ ...mpatibleCredentialsStorageIntegration.java | 148 ++++-------------- .../S3CompatibleStorageConfigurationInfo.java | 19 ++- regtests/minio/queries-for-spark.sql | 12 +- regtests/run_spark_sql_s3compatible.sh | 140 +++++++++-------- spec/polaris-management-service.yml | 19 ++- 7 files changed, 271 insertions(+), 206 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index ab70b9b497..d5ad54771f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -149,6 +149,7 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) return S3CompatibleStorageConfigInfo.builder() .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3ProfileName(s3Config.getS3ProfileName()) .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) .setAllowedLocations(s3Config.getAllowedLocations()) .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) @@ -275,6 +276,7 @@ public Builder setStorageConfigurationInfo( new S3CompatibleStorageConfigurationInfo( PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3ProfileName(), s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), s3ConfigModel.getS3PathStyleAccess(), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java index 02cc2af126..6eb26a94df 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java @@ -20,6 +20,11 @@ import jakarta.annotation.Nonnull; import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import software.amazon.awssdk.policybuilder.iam.*; public class StorageUtil { /** @@ -62,4 +67,136 @@ public class StorageUtil { public static @Nonnull String getBucket(URI uri) { return uri.getAuthority(); } + + /** + * Given a path, return it without leading slash + * + * @param path A path to parse + * @return Same path without leading slash + */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } + + /** + * Given an uri, and format an S3 path + * + * @param uri A path to parse + * @return A bucket and a path joined by slash + */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /** + * Given a roleArn, return the prefix + * + * @param roleArn A roleArn to parse + * @return The prefix of the roleArn + */ + private static String getArnPrefixFor(String roleArn) { + if (roleArn.contains("aws-cn")) { + return "arn:aws-cn:s3:::"; + } else if (roleArn.contains("aws-us-gov")) { + return "arn:aws-us-gov:s3:::"; + } else { + return "arn:aws:s3:::"; + } + } + + /** + * generate an IamPolicy from the input readLocations and writeLocations, optionally with list + * support. Credentials will be scoped to exactly the resources provided. If read and write + * locations are empty, a non-empty policy will be generated that grants GetObject and optionally + * ListBucket privileges with no resources. This prevents us from sending an empty policy to AWS + * and just assuming the role with full privileges. + * + * @param roleArn A roleArn + * @param allowList Allow list or not + * @param readLocations A list of input read locations + * @param writeLocations A list of input write locations + * @return A policy limiting scope access + */ + // TODO - add KMS key access + public static IamPolicy policyString( + String roleArn, boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = getArnPrefixFor(roleArn); + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + // TODO add support for CN and GOV + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + // TODO add support for CN and GOV + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 3dfb03814d..b1aebb4afd 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -21,12 +21,10 @@ import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import jakarta.annotation.Nonnull; +import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; -import java.util.HashMap; -import java.util.Map; import java.util.Set; -import java.util.stream.Stream; import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; import org.apache.polaris.core.context.RealmContext; @@ -36,12 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; -import software.amazon.awssdk.policybuilder.iam.IamEffect; -import software.amazon.awssdk.policybuilder.iam.IamPolicy; -import software.amazon.awssdk.policybuilder.iam.IamResource; -import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.profiles.ProfileFileSupplier; import software.amazon.awssdk.services.sts.StsClient; import software.amazon.awssdk.services.sts.StsClientBuilder; import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; @@ -60,7 +55,6 @@ public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore confi this.configurationStore = configurationStore; } - /** {@inheritDoc} */ @Override public EnumMap getSubscopedCreds( @Nonnull RealmContext realmContext, @@ -70,7 +64,6 @@ public EnumMap getSubscopedCreds( @Nonnull Set allowedReadLocations, @Nonnull Set allowedWriteLocations) { - StsClient stsClient; String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -85,136 +78,51 @@ public EnumMap getSubscopedCreds( } LOGGER.debug("S3Compatible - createStsClient()"); - try { - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); - stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); - if (caI != null && caS != null) { - // else default provider build credentials from profile or standard AWS env var - stsBuilder.credentialsProvider( - StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); - LOGGER.debug( - "S3Compatible - stsClient using keys from catalog settings - overiding default constructor"); - } - stsClient = stsBuilder.build(); + StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (storageConfig.getS3ProfileName() != null) { + stsBuilder.credentialsProvider( + ProfileCredentialsProvider.builder() + .profileFile(ProfileFileSupplier.defaultSupplier()) + .profileName(storageConfig.getS3ProfileName()) + .build()); + LOGGER.debug("S3Compatible - stsClient using profile from catalog settings"); + } else if (caI != null && caS != null) { + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug("S3Compatible - stsClient using keys from catalog settings"); + } + try (StsClient stsClient = stsBuilder.build()) { LOGGER.debug("S3Compatible - stsClient successfully built"); AssumeRoleResponse response = stsClient.assumeRole( AssumeRoleRequest.builder() .roleSessionName("PolarisCredentialsSTS") - .roleArn( - (storageConfig.getS3RoleArn() == null) ? "" : storageConfig.getS3RoleArn()) + .roleArn(storageConfig.getS3RoleArn()) .policy( - policyString(allowListOperation, allowedReadLocations, allowedWriteLocations) + StorageUtil.policyString( + storageConfig.getS3RoleArn(), + allowListOperation, + allowedReadLocations, + allowedWriteLocations) .toJson()) .durationSeconds( configurationStore.getConfiguration( realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); + propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); propertiesMap.put( PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); LOGGER.debug( - "S3Compatible - assumeRole - Token Expiration at : {}", + "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); - } catch (Exception e) { - System.err.println("S3Compatible - stsClient - build failure : " + e.getMessage()); + throw new NotAuthorizedException( + "Unable to build S3 Security Token Service client - " + e.getMessage()); } return propertiesMap; } - - /* - * function from AwsCredentialsStorageIntegration but without roleArn parameter - */ - private IamPolicy policyString( - boolean allowList, Set readLocations, Set writeLocations) { - IamPolicy.Builder policyBuilder = IamPolicy.builder(); - IamStatement.Builder allowGetObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetObject") - .addAction("s3:GetObjectVersion"); - Map bucketListStatementBuilder = new HashMap<>(); - Map bucketGetLocationStatementBuilder = new HashMap<>(); - - String arnPrefix = "arn:aws:s3:::"; - Stream.concat(readLocations.stream(), writeLocations.stream()) - .distinct() - .forEach( - location -> { - URI uri = URI.create(location); - allowGetObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - final var bucket = arnPrefix + StorageUtil.getBucket(uri); - if (allowList) { - bucketListStatementBuilder - .computeIfAbsent( - bucket, - (String key) -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:ListBucket") - .addResource(key)) - .addCondition( - IamConditionOperator.STRING_LIKE, - "s3:prefix", - StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); - } - bucketGetLocationStatementBuilder.computeIfAbsent( - bucket, - key -> - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:GetBucketLocation") - .addResource(key)); - }); - - if (!writeLocations.isEmpty()) { - IamStatement.Builder allowPutObjectStatementBuilder = - IamStatement.builder() - .effect(IamEffect.ALLOW) - .addAction("s3:PutObject") - .addAction("s3:DeleteObject"); - writeLocations.forEach( - location -> { - URI uri = URI.create(location); - allowPutObjectStatementBuilder.addResource( - IamResource.create( - arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); - }); - policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); - } - if (!bucketListStatementBuilder.isEmpty()) { - bucketListStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - } else if (allowList) { - // add list privilege with 0 resources - policyBuilder.addStatement( - IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); - } - - bucketGetLocationStatementBuilder - .values() - .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); - return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String parseS3Path(URI uri) { - String bucket = StorageUtil.getBucket(uri); - String path = trimLeadingSlash(uri.getPath()); - return String.join("/", bucket, path); - } - - /* function from AwsCredentialsStorageIntegration */ - private static @Nonnull String trimLeadingSlash(String path) { - if (path.startsWith("/")) { - path = path.substring(1); - } - return path; - } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 776279546a..76fe11008c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; +import jakarta.annotation.Nonnull; import java.util.List; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.jetbrains.annotations.NotNull; @@ -34,9 +35,10 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for subscoping creds. + // for allowed read and write locations for sub-scoping credentials. @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; private final @NotNull String s3Endpoint; + private final @Nullable String s3ProfileName; private final @Nullable String s3CredentialsCatalogAccessKeyId; private final @Nullable String s3CredentialsCatalogSecretAccessKey; private final @NotNull Boolean s3PathStyleAccess; @@ -47,33 +49,39 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigur public S3CompatibleStorageConfigurationInfo( @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, - @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = true) @Nullable + @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable String s3CredentialsCatalogAccessKeyId, - @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = true) @Nullable + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable String s3CredentialsCatalogSecretAccessKey, @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull Boolean s3PathStyleAccess, @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, - @JsonProperty(value = "allowedLocations", required = true) @Nullable + @JsonProperty(value = "allowedLocations", required = true) @Nonnull List allowedLocations) { super(StorageType.S3_COMPATIBLE, allowedLocations); validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); this.s3PathStyleAccess = s3PathStyleAccess; this.s3Endpoint = s3Endpoint; + this.s3ProfileName = s3ProfileName; this.s3CredentialsCatalogAccessKeyId = (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; this.s3CredentialsCatalogSecretAccessKey = (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; this.s3Region = s3Region; - this.s3RoleArn = s3RoleArn; + this.s3RoleArn = (s3RoleArn == null) ? "" : s3RoleArn; } public @NotNull String getS3Endpoint() { return this.s3Endpoint; } + public @Nullable String getS3ProfileName() { + return this.s3ProfileName; + } + public @NotNull Boolean getS3PathStyleAccess() { return this.s3PathStyleAccess; } @@ -103,6 +111,7 @@ public String toString() { .add("s3RoleArn", getS3RoleArn()) .add("s3PathStyleAccess", getS3PathStyleAccess()) .add("s3Endpoint", getS3Endpoint()) + .add("s3ProfileName", getS3ProfileName()) .toString(); } diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql index 966ea6db62..0932af1ee7 100644 --- a/regtests/minio/queries-for-spark.sql +++ b/regtests/minio/queries-for-spark.sql @@ -29,14 +29,10 @@ SELECT * FROM db1.ns2.view1; INSERT INTO db1.ns1.table1 VALUES (13, 23); SELECT * FROM db1.ns2.view1; -CREATE DATABASE IF NOT EXISTS db1; -CREATE OR REPLACE TABLE db1.table1 ( f1 int, f2 int ); -INSERT INTO db1.ns1.table1 VALUES (3, 2); - -- Test the second bucket allowed in the catalog -CREATE DATABASE IF NOT EXISTS db2 LOCATION 's3://warehouse2/polaris/'; -CREATE OR REPLACE TABLE db2.table1 ( f1 int, f2 int ); -INSERT INTO db2.table1 VALUES (01, 02); -SELECT * FROM db2.table1; +CREATE DATABASE IF NOT EXISTS wh2 LOCATION 's3://warehouse2/polaris'; +CREATE OR REPLACE TABLE wh2.table1 ( f1 int, f2 int ); +INSERT INTO wh2.table1 VALUES (01, 02); +SELECT * FROM wh2.table1; quit; diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatible.sh index 172488b7b2..ebd490b58f 100755 --- a/regtests/run_spark_sql_s3compatible.sh +++ b/regtests/run_spark_sql_s3compatible.sh @@ -21,7 +21,7 @@ # Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. # ----------------------------------------------------------------------------- # -# Prequisite: +# Requisite: # This script use a MinIO with TLS. # Please follow instructions in regtests/minio/Readme.md and update your # java cacerts with self-signed certificate @@ -40,6 +40,7 @@ clear + if [ $# -ne 0 ] && [ $# -ne 1 ]; then echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" echo "Usage: ./run_spark_sql.sh [S3-location]" @@ -63,18 +64,20 @@ fi # Second location for testing catalog update S3_LOCATION_2="s3://warehouse2/polaris/" +# If Polaris run inMemory classic mode, principal credentials are : root:secret +# If Polaris run inMemory DEBUG mode, principal credentials are to retrieve from service log within this pattern: 522f251cc2b9c121:6eff0915385979684d575fa1d3f18e2b # SPARK_BEARER_TOKEN if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ - -d "grant_type=client_credentials" \ - -d "client_id=root" \ - -d "client_secret=secret" \ - -d "scope=PRINCIPAL_ROLE:ALL"); then + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then echo "Error: Failed to retrieve bearer token" exit 1 fi SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') -if [ "SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then +if [ "$SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then echo "Error: Failed to retrieve bearer token" exit 1 fi @@ -101,54 +104,54 @@ echo -e "\n\n-------\n\n" echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured -echo "minio brower is availaible during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo "minio browser is available during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" echo -e "\n\n-------\n\n" # spark setup -export SPARK_VERSION=spark-3.5.2 -export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 - echo "Doing spark setup... wait a moment" +export SPARK_VERSION=spark-3.5.4 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding ./setup.sh > /dev/null 2>&1 - -if [ -z "${SPARK_HOME}"]; then +if [ -z "${SPARK_HOME}" ]; then export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi - - -# start of tests - -# creation of catalog - echo """ -These environnement variables have to be available to Polaris service : -CATALOG_S3_KEY_ID = minio-user-catalog -CATALOG_S3_KEY_SECRET = 12345678-minio-catalog +These environment variables have to be available to Polaris service or as keys in the aws profile, and the name of this profile provided to the catalog as parameter : export CATALOG_S3_KEY_ID=minio-user-catalog export CATALOG_S3_KEY_SECRET=12345678-minio-catalog """ - -echo -e "\n----\nCREATE Catalog with few parameters \n" +echo Add minio-catalog-1 section in aws profile +cat >>~/.aws/credentials < /dev/stderr -echo -e "\n----\nAssign the catalog_admin to the service_admin.\n" +echo -e "\n\n---- Assign the catalog_admin to the service_admin.\n" curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ -d '{"name": "catalog_admin"}' > /dev/stderr -echo -e "\n----\nStart Spark-sql to test Polaris catalog with queries\n" +echo -e "\n\n---- Start Spark-sql to test Polaris catalog with queries\n" ${SPARK_HOME}/bin/spark-sql --verbose \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ @@ -210,9 +214,13 @@ ${SPARK_HOME}/bin/spark-sql --verbose \ -f "minio/queries-for-spark.sql" +echo Remove minio-catalog-1 section from aws profile +sed -i '/\[minio-catalog-1\]/,${/\[minio-catalog-1\]/d; d}' ~/.aws/credentials +echo Done. + +echo +echo End of tests, a table and a view data with displayed should be visible in log above +echo Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata' +echo ':-)' -echo -e "\n\n\nEnd of tests, a table and a view data with displayed should be visible in log above" -echo "Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata'" -echo ":-)" -echo "" -docker-compose --progress quiet --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down +docker-compose --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down \ No newline at end of file diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index 370a62dc0d..2819b802c8 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -915,28 +915,33 @@ components: properties: s3.endpoint: type: string - description: the S3 endpoint + description: S3 endpoint example: "http[s]://host:port" + s3.profileName: + type: string + description: optional - S3 profile name (credentials) used by this catalog to communicate with S3 + example: "default or minio-storage-catalog-1 or ceph-storage-catalog-2" s3.credentials.catalog.accessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'ACCESS_KEY_ID' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'ACCESS_KEY_ID' used by this catalog to communicate with S3 example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" s3.credentials.catalog.secretAccessKeyEnvVar: type: string - description: Default to AWS credentials, otherwise set the environment variable name for the 'SECRET_ACCESS_KEY' used by the catalog to communicate with S3 + description: optional - environment variable name for the 'SECRET_ACCESS_KEY' used by this catalog to communicate with S3 example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" s3.pathStyleAccess: type: boolean - description: Whether or not to use path-style access + description: optional - whether or not to use path-style access default: false s3.region: type: string - description: Optional - the s3 region where data is stored + description: optional - s3 region where data is stored example: "rack-1 or us-east-1" s3.roleArn: type: string - description: Optional - a s3 role arn - example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" + description: optional - s3 role arn, used with assumeRole to obtain a Security Token Service + pattern: '^([u|a]rn:\S*:\S*:\S*:\S*:\S*).*$' + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234 or urn:ecs:iam::namespace:user/role" required: - s3.endpoint From c000bc7e5e92ca0a38ad86c2ebcc817b14c84fe8 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Tue, 4 Mar 2025 22:35:37 +0100 Subject: [PATCH 09/17] rebase --- ...mpatibleCredentialsStorageIntegration.java | 24 +++++++------------ ...PolarisStorageIntegrationProviderImpl.java | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index b1aebb4afd..02b6943117 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -19,18 +19,17 @@ package org.apache.polaris.core.storage.s3compatible; import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; +import static org.apache.polaris.core.PolarisConfiguration.loadConfig; -import jakarta.annotation.Nonnull; import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; import java.util.Set; -import org.apache.polaris.core.PolarisConfigurationStore; import org.apache.polaris.core.PolarisDiagnostics; -import org.apache.polaris.core.context.RealmContext; import org.apache.polaris.core.storage.InMemoryStorageIntegration; import org.apache.polaris.core.storage.PolarisCredentialProperty; import org.apache.polaris.core.storage.StorageUtil; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -48,21 +47,18 @@ public class S3CompatibleCredentialsStorageIntegration private static final Logger LOGGER = LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); - private final PolarisConfigurationStore configurationStore; - public S3CompatibleCredentialsStorageIntegration(PolarisConfigurationStore configurationStore) { - super(configurationStore, S3CompatibleCredentialsStorageIntegration.class.getName()); - this.configurationStore = configurationStore; + public S3CompatibleCredentialsStorageIntegration() { + super(S3CompatibleCredentialsStorageIntegration.class.getName()); } @Override public EnumMap getSubscopedCreds( - @Nonnull RealmContext realmContext, - @Nonnull PolarisDiagnostics diagnostics, - @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + @NotNull PolarisDiagnostics diagnostics, + @NotNull S3CompatibleStorageConfigurationInfo storageConfig, boolean allowListOperation, - @Nonnull Set allowedReadLocations, - @Nonnull Set allowedWriteLocations) { + @NotNull Set allowedReadLocations, + @NotNull Set allowedWriteLocations) { String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); @@ -106,9 +102,7 @@ public EnumMap getSubscopedCreds( allowedReadLocations, allowedWriteLocations) .toJson()) - .durationSeconds( - configurationStore.getConfiguration( - realmContext, STORAGE_CREDENTIAL_DURATION_SECONDS)) + .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index 9c3aeedb1b..dff9037863 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -77,7 +77,7 @@ public PolarisStorageIntegrationProviderImpl( case S3_COMPATIBLE: storageIntegration = (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(configurationStore); + new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From 882b84f04b05197f16d293779ce79dc4c1bec2bf Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 00:05:03 +0100 Subject: [PATCH 10/17] format --- .../service/storage/PolarisStorageIntegrationProviderImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index dff9037863..3f7247b483 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -76,8 +76,7 @@ public PolarisStorageIntegrationProviderImpl( break; case S3_COMPATIBLE: storageIntegration = - (PolarisStorageIntegration) - new S3CompatibleCredentialsStorageIntegration(); + (PolarisStorageIntegration) new S3CompatibleCredentialsStorageIntegration(); break; case GCS: storageIntegration = From eedee829fbaf9736cc01c4505c1f6f010853a8bf Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 18:12:39 +0100 Subject: [PATCH 11/17] Update polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java Co-authored-by: Yufei Gu --- .../S3CompatibleCredentialsStorageIntegration.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 02b6943117..61c5fbd78e 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -105,10 +105,9 @@ public EnumMap getSubscopedCreds( .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) .build()); - propertiesMap.put(PolarisCredentialProperty.AWS_KEY_ID, response.credentials().accessKeyId()); - propertiesMap.put( - PolarisCredentialProperty.AWS_SECRET_KEY, response.credentials().secretAccessKey()); - propertiesMap.put(PolarisCredentialProperty.AWS_TOKEN, response.credentials().sessionToken()); + propertiesMap.put(AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put(AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(AWS_TOKEN, response.credentials().sessionToken()); LOGGER.debug( "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); From ac951e3660852a9a2d6d93be012e91049c91a631 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 18:13:26 +0100 Subject: [PATCH 12/17] Update polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java Co-authored-by: Yufei Gu --- .../s3compatible/S3CompatibleCredentialsStorageIntegration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 61c5fbd78e..53bd7e1a81 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -113,7 +113,7 @@ public EnumMap getSubscopedCreds( response.credentials().expiration().toString()); } catch (Exception e) { throw new NotAuthorizedException( - "Unable to build S3 Security Token Service client - " + e.getMessage()); + "Unable to build S3 Security Token Service client", e); } return propertiesMap; From 7a97aecc8511349c7cdba93e42cc6f795c1da0d9 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 5 Mar 2025 19:32:47 +0100 Subject: [PATCH 13/17] tests --- .../polaris/core/entity/CatalogEntity.java | 1 - ...mpatibleCredentialsStorageIntegration.java | 8 +- .../S3CompatibleStorageConfigurationInfo.java | 3 +- ...ompatibleStorageConfigurationInfoTest.java | 23 ++++++ ...le.sh => run_spark_sql_s3compatibleTLS.sh} | 0 .../t_spark_sql/ref/spark_sql_minio.sh.ref | 36 +++++++++ regtests/t_spark_sql/src/spark_sql_minio.sh | 77 +++++++++++++++++++ 7 files changed, 142 insertions(+), 6 deletions(-) create mode 100644 polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java rename regtests/{run_spark_sql_s3compatible.sh => run_spark_sql_s3compatibleTLS.sh} (100%) create mode 100755 regtests/t_spark_sql/ref/spark_sql_minio.sh.ref create mode 100755 regtests/t_spark_sql/src/spark_sql_minio.sh diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index d5ad54771f..82364e2603 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -274,7 +274,6 @@ public Builder setStorageConfigurationInfo( (S3CompatibleStorageConfigInfo) storageConfigModel; config = new S3CompatibleStorageConfigurationInfo( - PolarisStorageConfigurationInfo.StorageType.S3_COMPATIBLE, s3ConfigModel.getS3Endpoint(), s3ConfigModel.getS3ProfileName(), s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 53bd7e1a81..923fc04680 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -20,6 +20,9 @@ import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; import static org.apache.polaris.core.PolarisConfiguration.loadConfig; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_KEY_ID; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_SECRET_KEY; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_TOKEN; import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; @@ -74,7 +77,7 @@ public EnumMap getSubscopedCreds( } LOGGER.debug("S3Compatible - createStsClient()"); - StsClientBuilder stsBuilder = software.amazon.awssdk.services.sts.StsClient.builder(); + StsClientBuilder stsBuilder = StsClient.builder(); stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); if (storageConfig.getS3ProfileName() != null) { stsBuilder.credentialsProvider( @@ -112,8 +115,7 @@ public EnumMap getSubscopedCreds( "S3Compatible - assumeRole - Obtained token expiration : {}", response.credentials().expiration().toString()); } catch (Exception e) { - throw new NotAuthorizedException( - "Unable to build S3 Security Token Service client", e); + throw new NotAuthorizedException("Unable to build S3 Security Token Service client", e); } return propertiesMap; diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 76fe11008c..807051a058 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -47,14 +47,13 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigur @JsonCreator public S3CompatibleStorageConfigurationInfo( - @JsonProperty(value = "storageType", required = true) @NotNull StorageType storageType, @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable String s3CredentialsCatalogAccessKeyId, @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable String s3CredentialsCatalogSecretAccessKey, - @JsonProperty(value = "s3PathStyleAccess", required = false) @NotNull + @JsonProperty(value = "s3PathStyleAccess", required = false, defaultValue = "false") @NotNull Boolean s3PathStyleAccess, @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, diff --git a/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java new file mode 100644 index 0000000000..5ba9d57ed0 --- /dev/null +++ b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.service.storage.s3compatible; + +public class S3CompatibleStorageConfigurationInfoTest { +} diff --git a/regtests/run_spark_sql_s3compatible.sh b/regtests/run_spark_sql_s3compatibleTLS.sh similarity index 100% rename from regtests/run_spark_sql_s3compatible.sh rename to regtests/run_spark_sql_s3compatibleTLS.sh diff --git a/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref b/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref new file mode 100755 index 0000000000..d1a453e299 --- /dev/null +++ b/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref @@ -0,0 +1,36 @@ +S3COMPATIBLE Starting test. (shell script called minio to avoid to be trapped by AWS test pattern based on s3 word) +{"defaults":{"default-base-location":"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog"},"overrides":{"prefix":"spark_sql_s3compatible_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","POST /v1/{prefix}/transactions/commit"]} +Catalog created +spark-sql (default)> use polaris; +spark-sql ()> show namespaces; +spark-sql ()> create namespace db1; +spark-sql ()> create namespace db2; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> + > create namespace db1.schema1; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> show namespaces in db1; +db1.schema1 +spark-sql ()> + > create table db1.schema1.tbl1 (col1 int); +spark-sql ()> show tables in db1; +spark-sql ()> use db1.schema1; +spark-sql (db1.schema1)> + > insert into tbl1 values (123), (234); +spark-sql (db1.schema1)> select * from tbl1; +123 +234 +spark-sql (db1.schema1)> + > drop table tbl1 purge; +spark-sql (db1.schema1)> show tables; +spark-sql (db1.schema1)> drop namespace db1.schema1; +spark-sql (db1.schema1)> drop namespace db1; +spark-sql (db1.schema1)> show namespaces; +db2 +spark-sql (db1.schema1)> drop namespace db2; +spark-sql (db1.schema1)> show namespaces; +spark-sql (db1.schema1)> diff --git a/regtests/t_spark_sql/src/spark_sql_minio.sh b/regtests/t_spark_sql/src/spark_sql_minio.sh new file mode 100755 index 0000000000..0ef3363b46 --- /dev/null +++ b/regtests/t_spark_sql/src/spark_sql_minio.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#if [ -z "$S3COMPATIBLE_TEST_ENABLED" ] || [ "$S3COMPATIBLE_TEST_ENABLED" != "true" ]; then +# echo "S3COMPATIBLE_TEST_ENABLED is not set to 'true'. Skipping test." +# exit 0 +#fi +echo "S3COMPATIBLE Starting test." + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{\"name\": \"spark_sql_s3compatible_catalog\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog\"}, \"storageConfigInfo\": {\"storageType\": \"S3_COMPATIBLE\", \"allowedLocations\": [\"s3://warehouse/polaris_test/\"], \"s3.endpoint\": \"http://localhost:9000\", \"s3.pathStyleAccess\": true, \"s3.credentials.catalog.accessKeyEnvVar\": \"MINIO_S3_CATALOG_1_ID\", \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"MINIO_S3_CATALOG_1_SECRET\" }}" > /dev/stderr + + +# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/spark_sql_s3compatible_catalog/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + +# For now, also explicitly assign the catalog_admin to the service_admin. Remove once GS fully rolled out for auto-assign. +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/spark_sql_s3compatible_catalog \ + -d '{"name": "catalog_admin"}' > /dev/stderr + +curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=spark_sql_s3compatible_catalog" +echo +echo "Catalog created" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=spark_sql_s3compatible_catalog +use polaris; +show namespaces; +create namespace db1; +create namespace db2; +show namespaces; + +create namespace db1.schema1; +show namespaces; +show namespaces in db1; + +create table db1.schema1.tbl1 (col1 int); +show tables in db1; +use db1.schema1; + +insert into tbl1 values (123), (234); +select * from tbl1; + +drop table tbl1 purge; +show tables; +drop namespace db1.schema1; +drop namespace db1; +show namespaces; +drop namespace db2; +show namespaces; +EOF + +curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/spark_sql_s3compatible_catalog > /dev/stderr From 006baafff51f869517fd253d583c0eafc6ba3589 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Fri, 7 Mar 2025 23:23:55 +0100 Subject: [PATCH 14/17] tests --- ...mpatibleCredentialsStorageIntegration.java | 10 +++--- .../S3CompatibleStorageConfigurationInfo.java | 15 ++++----- ...ompatibleStorageConfigurationInfoTest.java | 31 +++++++++++++++++- regtests/README.md | 4 +-- regtests/docker-compose.yml | 32 +++++++++++++++++++ regtests/minio/docker-compose.yml | 1 + regtests/run_spark_sql_s3compatibleTLS.sh | 6 ++-- regtests/t_spark_sql/src/spark_sql_minio.sh | 4 +-- 8 files changed, 82 insertions(+), 21 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 923fc04680..3c7d321295 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -24,6 +24,7 @@ import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_SECRET_KEY; import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_TOKEN; +import jakarta.annotation.Nonnull; import jakarta.ws.rs.NotAuthorizedException; import java.net.URI; import java.util.EnumMap; @@ -32,7 +33,6 @@ import org.apache.polaris.core.storage.InMemoryStorageIntegration; import org.apache.polaris.core.storage.PolarisCredentialProperty; import org.apache.polaris.core.storage.StorageUtil; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -57,11 +57,11 @@ public S3CompatibleCredentialsStorageIntegration() { @Override public EnumMap getSubscopedCreds( - @NotNull PolarisDiagnostics diagnostics, - @NotNull S3CompatibleStorageConfigurationInfo storageConfig, + @Nonnull PolarisDiagnostics diagnostics, + @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, boolean allowListOperation, - @NotNull Set allowedReadLocations, - @NotNull Set allowedWriteLocations) { + @Nonnull Set allowedReadLocations, + @Nonnull Set allowedWriteLocations) { String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 807051a058..283b64ddb9 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -23,10 +23,9 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; import java.util.List; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; -import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; /** * S3-Compatible Storage Configuration. This class holds the parameters needed to connect to @@ -37,23 +36,23 @@ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigur // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required // for allowed read and write locations for sub-scoping credentials. @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; - private final @NotNull String s3Endpoint; + private final @Nonnull String s3Endpoint; private final @Nullable String s3ProfileName; private final @Nullable String s3CredentialsCatalogAccessKeyId; private final @Nullable String s3CredentialsCatalogSecretAccessKey; - private final @NotNull Boolean s3PathStyleAccess; + private final @Nonnull Boolean s3PathStyleAccess; private final @Nullable String s3Region; private final @Nullable String s3RoleArn; @JsonCreator public S3CompatibleStorageConfigurationInfo( - @JsonProperty(value = "s3Endpoint", required = true) @NotNull String s3Endpoint, + @JsonProperty(value = "s3Endpoint", required = true) @Nonnull String s3Endpoint, @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable String s3CredentialsCatalogAccessKeyId, @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable String s3CredentialsCatalogSecretAccessKey, - @JsonProperty(value = "s3PathStyleAccess", required = false, defaultValue = "false") @NotNull + @JsonProperty(value = "s3PathStyleAccess", required = false, defaultValue = "false") @Nonnull Boolean s3PathStyleAccess, @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, @@ -73,7 +72,7 @@ public S3CompatibleStorageConfigurationInfo( this.s3RoleArn = (s3RoleArn == null) ? "" : s3RoleArn; } - public @NotNull String getS3Endpoint() { + public @Nonnull String getS3Endpoint() { return this.s3Endpoint; } @@ -81,7 +80,7 @@ public S3CompatibleStorageConfigurationInfo( return this.s3ProfileName; } - public @NotNull Boolean getS3PathStyleAccess() { + public @Nonnull Boolean getS3PathStyleAccess() { return this.s3PathStyleAccess; } diff --git a/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java index 5ba9d57ed0..6e1331172b 100644 --- a/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java +++ b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java @@ -16,8 +16,37 @@ * specific language governing permissions and limitations * under the License. */ - package org.apache.polaris.service.storage.s3compatible; +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; +import org.junit.jupiter.api.Test; + public class S3CompatibleStorageConfigurationInfoTest { + + @Test + public void testS3CompatibleStorageConfigurationInfo() { + String warehouseDir = "s3://bucket/path/to/warehouse"; + S3CompatibleStorageConfigurationInfo conf = + new S3CompatibleStorageConfigurationInfo( + "http://localhost:9000", + null, + "MINIO_S3_CATALOG_1_ID", + "MINIO_S3_CATALOG_1_SECRET", + true, + null, + null, + List.of(warehouseDir)); + assertThat(conf).isNotNull(); + assertThat(conf.getS3Endpoint()).isEqualTo("http://localhost:9000"); + assertThat(conf.getS3ProfileName()).isNull(); + assertThat(conf.getS3CredentialsCatalogAccessKeyId()).isEqualTo("MINIO_S3_CATALOG_1_ID"); + assertThat(conf.getS3CredentialsCatalogSecretAccessKey()) + .isEqualTo("MINIO_S3_CATALOG_1_SECRET"); + assertThat(conf.getS3PathStyleAccess()).isTrue(); + assertThat(conf.getS3Region()).isNull(); + assertThat(conf.getS3RoleArn()).isEqualTo(""); + } } diff --git a/regtests/README.md b/regtests/README.md index 1fb1bbcdb5..aba96be6a2 100644 --- a/regtests/README.md +++ b/regtests/README.md @@ -40,7 +40,7 @@ follows: ```shell ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache -docker compose -f ./regtests/docker-compose.yml up --build --exit-code-from regtest +docker-compose -f ./regtests/docker-compose.yml up --build --exit-code-from regtest ``` In this setup, a Polaris container will be started in a docker-compose group, using the image @@ -205,4 +205,4 @@ and download all of the test dependencies into it. From here, `run.sh` will be a To debug, setup IntelliJ to point at your virtual environment to find your test dependencies (see https://www.jetbrains.com/help/idea/configuring-python-sdk.html). Then run the test in your IDE. -The above is handled automatically when running reg tests from the docker image. \ No newline at end of file +The above is handled automatically when running reg tests from the docker image. diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 94a0f7502b..27b40b7d3a 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -31,6 +31,8 @@ services: AZURE_TENANT_ID: $AZURE_TENANT_ID AZURE_CLIENT_ID: $AZURE_CLIENT_ID AZURE_CLIENT_SECRET: $AZURE_CLIENT_SECRET + MINIO_S3_CATALOG_1_ID: minio-user-catalog + MINIO_S3_CATALOG_1_SECRET: 12345678-minio-catalog POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,secret quarkus.log.file.enable: "false" quarkus.otel.sdk.disabled: "true" @@ -70,3 +72,33 @@ services: volumes: - ./output:/tmp/polaris-regtests/ - ./credentials:/tmp/credentials/ + minio-without-tls: + image: minio/minio:latest + container_name: minio-without-tls + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + ports: + - 9000:9000 + volumes: + - ./minio/miniodata:/data + command: ["server", "/data"] + minio-configured-without-tls: + depends_on: + - minio-without-tls + image: minio/mc:latest + container_name: minio-configured-without-tls + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://minio-without-tls:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + tail -f /dev/null + " diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml index ff6a5c0a72..0c528d94c6 100644 --- a/regtests/minio/docker-compose.yml +++ b/regtests/minio/docker-compose.yml @@ -29,6 +29,7 @@ services: minio_net: aliases: - warehouse.minio + - warehouse2.minio ports: - 9001:9001 - 9000:9000 diff --git a/regtests/run_spark_sql_s3compatibleTLS.sh b/regtests/run_spark_sql_s3compatibleTLS.sh index ebd490b58f..3f2dddd4e6 100755 --- a/regtests/run_spark_sql_s3compatibleTLS.sh +++ b/regtests/run_spark_sql_s3compatibleTLS.sh @@ -27,7 +27,7 @@ # java cacerts with self-signed certificate # # Usage: -# ./run_spark_sql_s3compatible.sh [S3-location] +# ./run_spark_sql_s3compatibleTLS.sh [S3-location] # # Description: # - Without arguments: Runs against default minio bucket s3://warehouse/polaris @@ -36,13 +36,13 @@ # # Examples: # - Run against AWS S3_COMPATIBLE: -# ./run_spark_sql_s3compatible.sh s3://warehouse/polaris +# ./run_spark_sql_s3compatibleTLS.sh s3://warehouse/polaris clear if [ $# -ne 0 ] && [ $# -ne 1 ]; then - echo "run_spark_sql_s3compatible.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" + echo "run_spark_sql_s3compatibleTLS.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" echo "Usage: ./run_spark_sql.sh [S3-location]" exit 1 fi diff --git a/regtests/t_spark_sql/src/spark_sql_minio.sh b/regtests/t_spark_sql/src/spark_sql_minio.sh index 0ef3363b46..b1c3cf6911 100755 --- a/regtests/t_spark_sql/src/spark_sql_minio.sh +++ b/regtests/t_spark_sql/src/spark_sql_minio.sh @@ -23,13 +23,13 @@ # echo "S3COMPATIBLE_TEST_ENABLED is not set to 'true'. Skipping test." # exit 0 #fi -echo "S3COMPATIBLE Starting test." +echo "S3COMPATIBLE Starting test. (shell script called minio to avoid to be trapped by AWS test pattern based on s3 word)" SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ - -d "{\"name\": \"spark_sql_s3compatible_catalog\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog\"}, \"storageConfigInfo\": {\"storageType\": \"S3_COMPATIBLE\", \"allowedLocations\": [\"s3://warehouse/polaris_test/\"], \"s3.endpoint\": \"http://localhost:9000\", \"s3.pathStyleAccess\": true, \"s3.credentials.catalog.accessKeyEnvVar\": \"MINIO_S3_CATALOG_1_ID\", \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"MINIO_S3_CATALOG_1_SECRET\" }}" > /dev/stderr + -d "{\"name\": \"spark_sql_s3compatible_catalog\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog\"}, \"storageConfigInfo\": {\"storageType\": \"S3_COMPATIBLE\", \"allowedLocations\": [\"s3://warehouse/polaris_test/\"], \"s3.endpoint\": \"http://minio-without-tls:9000\", \"s3.pathStyleAccess\": true, \"s3.credentials.catalog.accessKeyEnvVar\": \"MINIO_S3_CATALOG_1_ID\", \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"MINIO_S3_CATALOG_1_SECRET\" }}" > /dev/stderr # Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata From ca04dc1b50ba4f05aa86aec5dbc35a975da22344 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Mon, 10 Mar 2025 19:45:32 +0100 Subject: [PATCH 15/17] MAX_ALLOWED_LOCATIONS removed --- .../s3compatible/S3CompatibleStorageConfigurationInfo.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java index 283b64ddb9..e4e4cfd048 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -19,7 +19,6 @@ package org.apache.polaris.core.storage.s3compatible; import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; import jakarta.annotation.Nonnull; @@ -33,9 +32,6 @@ */ public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { - // 5 is the approximate max allowed locations for the size of AccessPolicy when LIST is required - // for allowed read and write locations for sub-scoping credentials. - @JsonIgnore private static final int MAX_ALLOWED_LOCATIONS = 5; private final @Nonnull String s3Endpoint; private final @Nullable String s3ProfileName; private final @Nullable String s3CredentialsCatalogAccessKeyId; @@ -60,7 +56,6 @@ public S3CompatibleStorageConfigurationInfo( List allowedLocations) { super(StorageType.S3_COMPATIBLE, allowedLocations); - validateMaxAllowedLocations(MAX_ALLOWED_LOCATIONS); this.s3PathStyleAccess = s3PathStyleAccess; this.s3Endpoint = s3Endpoint; this.s3ProfileName = s3ProfileName; From 025ac8cb1ec0d645caeda3947c8734059794268c Mon Sep 17 00:00:00 2001 From: lefebsy Date: Tue, 11 Mar 2025 19:25:32 +0100 Subject: [PATCH 16/17] helm main restoration --- helm/polaris/tests/configmap_test.yaml | 286 +++++++++++++------------ helm/polaris/values.yaml | 273 +++++++++++++++++------ 2 files changed, 355 insertions(+), 204 deletions(-) diff --git a/helm/polaris/tests/configmap_test.yaml b/helm/polaris/tests/configmap_test.yaml index e070bf0dcf..ef725ec4f3 100644 --- a/helm/polaris/tests/configmap_test.yaml +++ b/helm/polaris/tests/configmap_test.yaml @@ -183,141 +183,159 @@ tests: set: logging: { file: { enabled: true, json: true }, console: { enabled: true, json: true } } asserts: - - equal: - path: data - value: - polaris-server.yml: |- - authenticator: - class: org.apache.polaris.service.auth.TestInlineBearerTokenPolarisAuthenticator - callContextResolver: - type: default - cors: - allowed-credentials: true - allowed-headers: - - '*' - allowed-methods: - - PATCH - - POST - - DELETE - - GET - - PUT - allowed-origins: - - http://localhost:8080 - allowed-timing-origins: - - http://localhost:8080 - exposed-headers: - - '*' - preflight-max-age: 600 - defaultRealms: - - default-realm - featureConfiguration: - ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - SUPPORTED_CATALOG_STORAGE_TYPES: - - S3 - - S3_COMPATIBLE - - GCS - - AZURE - - FILE - io: - factoryType: default - logging: - appenders: - - logFormat: '%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] - %c{30}: %m %kvp%n%ex' - threshold: ALL - type: console - level: INFO - loggers: - org.apache.iceberg.rest: DEBUG - org.apache.polaris: DEBUG - maxRequestBodyBytes: -1 - metaStoreManager: - type: in-memory - oauth2: - type: test - rateLimiter: - type: no-op - realmContextResolver: - type: default - server: - adminConnectors: - - port: 8182 - type: http - applicationConnectors: - - port: 8181 - type: http - maxThreads: 200 - minThreads: 10 - requestLog: - appenders: - - type: console - - it: should set config map data (auto sorted) + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.enable=true" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.enable=true" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.file.json=true" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.console.json=true" } + + - it: should include logging categories + set: + logging: + categories: + # compact style + org.acme: DEBUG + # expanded style + org: + acme: + service: INFO + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme\".level=DEBUG" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.log.category.\"org.acme.service\".level=INFO" } + + - it: should include MDC context + set: + logging: + mdc: + # compact style + org.acme: foo + # expanded style + org: + acme: + service: foo + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme\"=foo" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.log.mdc.\"org.acme.service\"=foo" } + + - it: should include telemetry configuration + set: + tracing: { enabled: true, endpoint: http://custom:4317, attributes: { service.name: custom, foo: bar } } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.exporter.otlp.endpoint=http://custom:4317" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=service.name=custom" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.resource.attributes\\[\\d\\]=foo=bar" } + + - it: should include set sample rate numeric + set: + tracing: { enabled: true, sample: "0.123" } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_traceidratio" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler.arg=0.123" } + + - it: should include set sample rate "all" + set: + tracing: { enabled: true, sample: "all" } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=parentbased_always_on" } + + - it: should include set sample rate "none" + set: + tracing: { enabled: true, sample: "none" } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.traces.sampler=always_off" } + + - it: should disable tracing by default + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } + + - it: should disable tracing + set: + tracing: { enabled: false } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.otel.sdk.disabled=true" } + + - it: should include custom metrics + set: + metrics: { enabled: true, tags: { app: custom, foo: bar } } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.app=custom" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.metrics.tags.foo=bar" } + + - it: should disable metrics + set: + metrics: { enabled: false } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.micrometer.enabled=false" } + + - it: should include advanced configuration + set: + advancedConfig: + # compact style + quarkus.compact.custom: true + # expanded style + quarkus: + expanded: + custom: foo + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.compact.custom=true" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.expanded.custom=foo" } + + - it: should not include CORS configuration by default + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors" } + not: true + + - it: should include CORS configuration if defined + set: + cors: { allowedOrigins: [ "http://localhost:3000", "https://localhost:4000" ], allowedMethods: [ "GET", "POST" ], allowedHeaders: [ "X-Custom1", "X-Custom2" ], exposedHeaders: [ "X-Exposed-Custom1", "X-Exposed-Custom2" ], accessControlMaxAge: "PT1H", accessControlAllowCredentials: false } + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.origins=http://localhost:3000,https://localhost:4000" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.methods=GET,POST" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.headers=X-Custom1,X-Custom2" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.exposed-headers=X-Exposed-Custom1,X-Exposed-Custom2" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-max-age=PT1H" } + - matchRegex: { path: 'data["application.properties"]', pattern: "quarkus.http.cors.access-control-allow-credentials=false" } + + - it: should configure rate-limiter with default values + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } + + - it: should configure rate-limiter no-op + set: + rateLimiter.type: no-op + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=no-op" } + + - it: should configure rate-limiter with default token bucket values + set: + rateLimiter.type: default + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=default" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=default" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=9999" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT10S" } + + - it: should configure rate-limiter with custom token bucket values + set: + rateLimiter: + type: custom + tokenBucket: + type: custom + requestsPerSecond: 1234 + window: PT5S + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.filter.type=custom" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.type=custom" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.requests-per-second=1234" } + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.rate-limiter.token-bucket.window=PT5S" } + + - it: should not include tasks configuration by default + asserts: + - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks" } + not: true + + - it: should include tasks configuration if defined set: - polarisServerConfig: - server: - maxThreads: 200 - minThreads: 10 - applicationConnectors: - - type: http - port: 8181 - adminConnectors: - - type: http - port: 8182 - requestLog: - appenders: - - type: console - featureConfiguration: - ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - SUPPORTED_CATALOG_STORAGE_TYPES: - - S3 - callContextResolver: - type: default - realmContextResolver: - type: default - defaultRealms: - - default-realm - metaStoreManager: - type: eclipse-link - persistence-unit: polaris - conf-file: /eclipselink-config/conf.jar!/persistence.xml - io: - factoryType: default - oauth2: - type: default - tokenBroker: - type: symmetric-key - secret: polaris - authenticator: - class: org.apache.polaris.service.auth.DefaultPolarisAuthenticator - cors: - allowed-origins: - - http://localhost:8080 - allowed-timing-origins: - - http://localhost:8080 - allowed-methods: - - PATCH - - POST - - DELETE - - GET - - PUT - allowed-headers: - - "*" - exposed-headers: - - "*" - preflight-max-age: 600 - allowed-credentials: true - logging: - level: INFO - loggers: - org.apache.iceberg.rest: INFO - org.apache.polaris: INFO - appenders: - - type: console - threshold: ALL - logFormat: "%-5p [%d{ISO8601} - %-6r] [%t] [%X{aid}%X{sid}%X{tid}%X{wid}%X{oid}%X{srv}%X{job}%X{rid}] %c{30}: %m %kvp%n%ex" - maxRequestBodyBytes: -1 - rateLimiter: - type: no-op + tasks: { maxConcurrentTasks: 10, maxQueuedTasks: 20 } asserts: - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-concurrent-tasks=10" } - matchRegex: { path: 'data["application.properties"]', pattern: "polaris.tasks.max-queued-tasks=20" } diff --git a/helm/polaris/values.yaml b/helm/polaris/values.yaml index a6d7f3180f..7713d8ca2c 100644 --- a/helm/polaris/values.yaml +++ b/helm/polaris/values.yaml @@ -359,76 +359,209 @@ extraEnv: # name: aws-secret # key: secret_access_key -# -- Configures whether to enable the bootstrap metastore manager job -bootstrapMetastoreManager: false - -# -- Extra environment variables to add to the bootstrap metastore manager job (see `extraEnv` for an example) -bootstrapExtraEnv: [] - -# -- The secret name to pull persistence.xml from (ensure the key name is 'persistence.xml') -persistenceConfigSecret: ~ - -# -- Configures for polaris-server.yml -polarisServerConfig: - server: - # Maximum number of threads. - maxThreads: 200 - - # Minimum number of thread to keep alive. - minThreads: 10 - applicationConnectors: - # HTTP-specific options. - - type: http - - # The port on which the HTTP server listens for service requests. - port: 8181 - - adminConnectors: - - type: http - port: 8182 - - # The hostname of the interface to which the HTTP server socket wil be found. If omitted, the - # socket will listen on all interfaces. - # bindHost: localhost - - # ssl: - # keyStore: ./example.keystore - # keyStorePassword: example - # - # keyStoreType: JKS # (optional, JKS is default) - - # HTTP request log settings - requestLog: - appenders: - # Settings for logging to stdout. - - type: console - - # # Settings for logging to a file. - # - type: file - - # # The file to which statements will be logged. - # currentLogFilename: ./logs/request.log - - # # When the log file rolls over, the file will be archived to requests-2012-03-15.log.gz, - # # requests.log will be truncated, and new statements written to it. - # archivedLogFilenamePattern: ./logs/requests-%d.log.gz - - # # The maximum number of log files to archive. - # archivedFileCount: 14 - - # # Enable archiving if the request log entries go to the their own file - # archive: true - - featureConfiguration: - ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false - SUPPORTED_CATALOG_STORAGE_TYPES: - - S3 - - S3_COMPATIBLE - - GCS - - AZURE - - FILE - - callContextResolver: +# -- Extra volumes to add to the polaris pod. See https://kubernetes.io/docs/concepts/storage/volumes/. +extraVolumes: [] + # - name: extra-volume + # emptyDir: {} + +# -- Extra volume mounts to add to the polaris container. See https://kubernetes.io/docs/concepts/storage/volumes/. +extraVolumeMounts: [] + # - name: extra-volume + # mountPath: /usr/share/extra-volume + +# -- Add additional init containers to the polaris pod(s) See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/. +extraInitContainers: [] + # - name: your-image-name + # image: your-image + # imagePullPolicy: Always + # command: ['sh', '-c', 'echo "hello world"'] + +tracing: + # -- Specifies whether tracing for the polaris server should be enabled. + enabled: false + # -- The collector endpoint URL to connect to (required). + # The endpoint URL must have either the http:// or the https:// scheme. + # The collector must talk the OpenTelemetry protocol (OTLP) and the port must be its gRPC port (by default 4317). + # See https://quarkus.io/guides/opentelemetry for more information. + endpoint: "http://otlp-collector:4317" + # -- Which requests should be sampled. Valid values are: "all", "none", or a ratio between 0.0 and + # "1.0d" (inclusive). E.g. "0.5d" means that 50% of the requests will be sampled. + # Note: avoid entering numbers here, always prefer a string representation of the ratio. + sample: "1.0d" + # -- Resource attributes to identify the polaris service among other tracing sources. + # See https://opentelemetry.io/docs/reference/specification/resource/semantic_conventions/#service. + # If left empty, traces will be attached to a service named "Apache Polaris"; to change this, + # provide a service.name attribute here. + attributes: + {} + # service.name: my-polaris + +metrics: + # -- Specifies whether metrics for the polaris server should be enabled. + enabled: true + # -- Additional tags (dimensional labels) to add to the metrics. + tags: + {} + # service: polaris + # environment: production + +serviceMonitor: + # -- Specifies whether a ServiceMonitor for Prometheus operator should be created. + enabled: true + # -- The scrape interval; leave empty to let Prometheus decide. Must be a valid duration, e.g. 1d, 1h30m, 5m, 10s. + interval: "" + # -- Labels for the created ServiceMonitor so that Prometheus operator can properly pick it up. + labels: + {} + # release: prometheus + # -- Relabeling rules to apply to metrics. Ref https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config. + metricRelabelings: [] + # - source_labels: [ __meta_kubernetes_namespace ] + # separator: ; + # regex: (.*) + # target_label: namespace + # replacement: $1 + # action: replace + +# -- Logging configuration. +logging: + # -- The log level of the root category, which is used as the default log level for all categories. + level: INFO + # -- The header name to use for the request ID. + requestIdHeaderName: Polaris-Request-Id + # -- Configuration for the console appender. + console: + # -- Whether to enable the console appender. + enabled: true + # -- The log level of the console appender. + threshold: ALL + # -- Whether to log in JSON format. + json: false + # -- The log format to use. Ignored if JSON format is enabled. See + # https://quarkus.io/guides/logging#logging-format for details. + format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" + # -- Configuration for the file appender. + file: + # -- Whether to enable the file appender. + enabled: false + # -- The log level of the file appender. + threshold: ALL + # -- Whether to log in JSON format. + json: false + # -- The log format to use. Ignored if JSON format is enabled. See + # https://quarkus.io/guides/logging#logging-format for details. + format: "%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c{3.}] [%X{requestId},%X{realmId}] [%X{traceId},%X{parentId},%X{spanId},%X{sampled}] (%t) %s%e%n" + # -- The local directory where log files are stored. The persistent volume claim will be mounted + # here. + logsDir: /deployments/logs + # -- The log file name. + fileName: polaris.log + # -- Log rotation configuration. + rotation: + # -- The maximum size of the log file before it is rotated. Should be expressed as a Kubernetes quantity. + maxFileSize: 100Mi + # -- The maximum number of backup files to keep. + maxBackupIndex: 5 + # -- An optional suffix to append to the rotated log files. If present, the rotated log files + # will be grouped in time buckets, and each bucket will contain at most maxBackupIndex files. + # The suffix must be in a date-time format that is understood by DateTimeFormatter. If the + # suffix ends with .gz or .zip, the rotated files will also be compressed using the + # corresponding algorithm. + fileSuffix: ~ # .yyyy-MM-dd.gz + # -- The log storage configuration. A persistent volume claim will be created using these + # settings. + storage: + # -- The storage class name of the persistent volume claim to create. + className: standard + # -- The size of the persistent volume claim to create. + size: 512Gi + # -- Labels to add to the persistent volume claim spec selector; a persistent volume with + # matching labels must exist. Leave empty if using dynamic provisioning. + selectorLabels: {} + # app.kubernetes.io/name: polaris + # app.kubernetes.io/instance: RELEASE-NAME + # -- Configuration for specific log categories. + categories: + org.apache.polaris: INFO + org.apache.iceberg.rest: INFO + # Useful to debug configuration issues: + # io.smallrye.config: DEBUG + # -- Configuration for MDC (Mapped Diagnostic Context). Values specified here will be added to the + # log context of all incoming requests and can be used in log patterns. + mdc: + # aid=polaris + # sid=polaris-service + {} + +# -- Realm context resolver configuration. +realmContext: + # -- The type of realm context resolver to use. Two built-in types are supported: default and test; + # test is not recommended for production as it does not perform any realm validation. + type: default + # -- List of valid realms, for use with the default realm context resolver. The first realm in + # the list is the default realm. Realms not in this list will be rejected. + realms: + - POLARIS + +# -- Polaris features configuration. +features: + # -- Features to enable or disable globally. If a feature is not present in the map, the default + # built-in value is used. + defaults: {} + # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: false + # SUPPORTED_CATALOG_STORAGE_TYPES: + # - S3 + # - GCS + # - AZURE + # - FILE + # -- Features to enable or disable per realm. This field is a map of maps. The realm name is the key, and the value is a map of + # feature names to values. If a feature is not present in the map, the default value from the 'defaults' field is used. + realmOverrides: {} + # my-realm: + # ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING: true + +# -- Polaris persistence configuration. +persistence: + # -- The type of persistence to use. Two built-in types are supported: in-memory and eclipse-link. + type: eclipse-link # in-memory + # -- The configuration for the eclipse-link persistence manager. + eclipseLink: + # -- The secret name to pull persistence.xml from. + secret: + # -- The name of the secret to pull persistence.xml from. + # If not provided, the default built-in persistence.xml will be used. This is probably not what you want. + name: ~ + # -- The key in the secret to pull persistence.xml from. + key: persistence.xml + # -- The persistence unit name to use. + persistenceUnit: polaris + +# -- Polaris FileIO configuration. +fileIo: + # -- The type of file IO to use. Two built-in types are supported: default and wasb. The wasb one translates WASB paths to ABFS ones. + type: default + +# -- Storage credentials for the server. If the following properties are unset, default +# credentials will be used, in which case the pod must have the necessary permissions to access the storage. +storage: + # -- The secret to pull storage credentials from. + secret: + # -- The name of the secret to pull storage credentials from. + name: ~ + # -- The key in the secret to pull the AWS access key ID from. Only required when using AWS. + awsAccessKeyId: ~ + # -- The key in the secret to pull the AWS secret access key from. Only required when using AWS. + awsSecretAccessKey: ~ + # -- The key in the secret to pull the GCP token from. Only required when using GCP. + gcpToken: ~ + # -- The key in the secret to pull the GCP token expiration time from. Only required when using GCP. Must be a valid ISO 8601 duration. The default is PT1H (1 hour). + gcpTokenLifespan: ~ + +# -- Polaris authentication configuration. +authentication: + # -- The type of authentication to use. Two built-in types are supported: default and test; + # test is not recommended for production. + authenticator: type: default # -- The type of token service to use. Two built-in types are supported: default and test; # test is not recommended for production. From 68010791626b54e2d7e4e26e4ebedb1d96cffa49 Mon Sep 17 00:00:00 2001 From: lefebsy Date: Wed, 12 Mar 2025 22:14:00 +0100 Subject: [PATCH 17/17] rebase to resole conflict --- .../S3CompatibleCredentialsStorageIntegration.java | 4 ++-- regtests/docker-compose.yml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java index 3c7d321295..674c735fd4 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -18,8 +18,8 @@ */ package org.apache.polaris.core.storage.s3compatible; -import static org.apache.polaris.core.PolarisConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; -import static org.apache.polaris.core.PolarisConfiguration.loadConfig; +import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; +import static org.apache.polaris.core.config.PolarisConfiguration.loadConfig; import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_KEY_ID; import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_SECRET_KEY; import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_TOKEN; diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 27b40b7d3a..16572db988 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -51,6 +51,8 @@ services: depends_on: polaris: condition: service_healthy + minio-configured-without-tls: + condition: service_started environment: AWS_TEST_ENABLED: $AWS_TEST_ENABLED AWS_STORAGE_BUCKET: $AWS_STORAGE_BUCKET