diff --git a/.gitignore b/.gitignore index e220135f64..0092dccc2f 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ regtests/output/ # This file, if checked in after running for example regtests, contains unmanaged dependencies that eventually # cause unnecessary "security alerts" like https://github.com/apache/polaris/pull/718. regtests/client/python/poetry.lock +regtests/minio/miniodata/* # Python stuff (see note about poetry.lock above as well!) /poetry.lock @@ -64,6 +65,9 @@ gradle/wrapper/gradle-wrapper-*.sha256 *.ipr *.iws +# VScode +.vscode + # Gradle /.gradle /build-logic/.gradle diff --git a/build.gradle.kts b/build.gradle.kts index 45f20b59e9..02c24a4db2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -109,6 +109,9 @@ tasks.named("rat").configure { excludes.add("regtests/metastore_db/**") excludes.add("regtests/client/python/.openapi-generator/**") excludes.add("regtests/output/**") + excludes.add("regtests/minio/miniodata/**") + excludes.add("regtests/minio/**/*.crt") + excludes.add("regtests/minio/**/*.key") excludes.add("**/*.ipynb") excludes.add("**/*.iml") diff --git a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java index 087d0525ee..a542c3ac2c 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java @@ -127,6 +127,7 @@ protected FeatureConfiguration( .defaultValue( List.of( StorageConfigInfo.StorageTypeEnum.S3.name(), + StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE.name(), StorageConfigInfo.StorageTypeEnum.AZURE.name(), StorageConfigInfo.StorageTypeEnum.GCS.name(), StorageConfigInfo.StorageTypeEnum.FILE.name())) diff --git a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java index f3bfd6edf0..82364e2603 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/entity/CatalogEntity.java @@ -38,12 +38,14 @@ import org.apache.polaris.core.admin.model.FileStorageConfigInfo; import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.S3CompatibleStorageConfigInfo; import org.apache.polaris.core.admin.model.StorageConfigInfo; import org.apache.polaris.core.storage.FileStorageConfigurationInfo; import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; /** * Catalog specific subclass of the {@link PolarisEntity} that handles conversion from the {@link @@ -141,6 +143,22 @@ private StorageConfigInfo getStorageInfo(Map internalProperties) .setRegion(awsConfig.getRegion()) .build(); } + if (configInfo instanceof S3CompatibleStorageConfigurationInfo) { + S3CompatibleStorageConfigurationInfo s3Config = + (S3CompatibleStorageConfigurationInfo) configInfo; + return S3CompatibleStorageConfigInfo.builder() + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3_COMPATIBLE) + .setS3Endpoint(s3Config.getS3Endpoint()) + .setS3ProfileName(s3Config.getS3ProfileName()) + .setS3PathStyleAccess(s3Config.getS3PathStyleAccess()) + .setAllowedLocations(s3Config.getAllowedLocations()) + .setS3CredentialsCatalogAccessKeyEnvVar(s3Config.getS3CredentialsCatalogAccessKeyId()) + .setS3CredentialsCatalogSecretAccessKeyEnvVar( + s3Config.getS3CredentialsCatalogSecretAccessKey()) + .setS3Region(s3Config.getS3Region()) + .setS3RoleArn(s3Config.getS3RoleArn()) + .build(); + } if (configInfo instanceof AzureStorageConfigurationInfo) { AzureStorageConfigurationInfo azureConfig = (AzureStorageConfigurationInfo) configInfo; return AzureStorageConfigInfo.builder() @@ -250,6 +268,21 @@ public Builder setStorageConfigurationInfo( awsConfig.validateArn(awsConfigModel.getRoleArn()); config = awsConfig; break; + + case S3_COMPATIBLE: + S3CompatibleStorageConfigInfo s3ConfigModel = + (S3CompatibleStorageConfigInfo) storageConfigModel; + config = + new S3CompatibleStorageConfigurationInfo( + s3ConfigModel.getS3Endpoint(), + s3ConfigModel.getS3ProfileName(), + s3ConfigModel.getS3CredentialsCatalogAccessKeyEnvVar(), + s3ConfigModel.getS3CredentialsCatalogSecretAccessKeyEnvVar(), + s3ConfigModel.getS3PathStyleAccess(), + s3ConfigModel.getS3Region(), + s3ConfigModel.getS3RoleArn(), + new ArrayList<>(allowedLocations)); + break; case AZURE: AzureStorageConfigInfo azureConfigModel = (AzureStorageConfigInfo) storageConfigModel; AzureStorageConfigurationInfo azureConfigInfo = diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java index 2f21a84fd7..f9d105b49b 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisCredentialProperty.java @@ -27,6 +27,9 @@ public enum PolarisCredentialProperty { String.class, "s3.session-token-expires-at-ms", "the time the aws session token expires, in milliseconds"), + AWS_ENDPOINT(String.class, "s3.endpoint", "the aws s3 endpoint"), + AWS_PATH_STYLE_ACCESS( + Boolean.class, "s3.path-style-access", "whether or not to use path-style access"), CLIENT_REGION( String.class, "client.region", "region to configure client for making requests to AWS"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java index 9631d95b42..6d24d4fc78 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/PolarisStorageConfigurationInfo.java @@ -47,6 +47,7 @@ import org.apache.polaris.core.storage.aws.AwsStorageConfigurationInfo; import org.apache.polaris.core.storage.azure.AzureStorageConfigurationInfo; import org.apache.polaris.core.storage.gcp.GcpStorageConfigurationInfo; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,6 +63,7 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ @JsonSubTypes.Type(value = AwsStorageConfigurationInfo.class), + @JsonSubTypes.Type(value = S3CompatibleStorageConfigurationInfo.class), @JsonSubTypes.Type(value = AzureStorageConfigurationInfo.class), @JsonSubTypes.Type(value = GcpStorageConfigurationInfo.class), @JsonSubTypes.Type(value = FileStorageConfigurationInfo.class), @@ -241,6 +243,7 @@ public void validateMaxAllowedLocations(int maxAllowedLocations) { /** Polaris' storage type, each has a fixed prefix for its location */ public enum StorageType { S3("s3://"), + S3_COMPATIBLE("s3://"), AZURE(List.of("abfs://", "wasb://", "abfss://", "wasbs://")), GCS("gs://"), FILE("file://"), diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java index 02cc2af126..6eb26a94df 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/StorageUtil.java @@ -20,6 +20,11 @@ import jakarta.annotation.Nonnull; import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import software.amazon.awssdk.policybuilder.iam.*; public class StorageUtil { /** @@ -62,4 +67,136 @@ public class StorageUtil { public static @Nonnull String getBucket(URI uri) { return uri.getAuthority(); } + + /** + * Given a path, return it without leading slash + * + * @param path A path to parse + * @return Same path without leading slash + */ + private static @Nonnull String trimLeadingSlash(String path) { + if (path.startsWith("/")) { + path = path.substring(1); + } + return path; + } + + /** + * Given an uri, and format an S3 path + * + * @param uri A path to parse + * @return A bucket and a path joined by slash + */ + private static @Nonnull String parseS3Path(URI uri) { + String bucket = getBucket(uri); + String path = trimLeadingSlash(uri.getPath()); + return String.join("/", bucket, path); + } + + /** + * Given a roleArn, return the prefix + * + * @param roleArn A roleArn to parse + * @return The prefix of the roleArn + */ + private static String getArnPrefixFor(String roleArn) { + if (roleArn.contains("aws-cn")) { + return "arn:aws-cn:s3:::"; + } else if (roleArn.contains("aws-us-gov")) { + return "arn:aws-us-gov:s3:::"; + } else { + return "arn:aws:s3:::"; + } + } + + /** + * generate an IamPolicy from the input readLocations and writeLocations, optionally with list + * support. Credentials will be scoped to exactly the resources provided. If read and write + * locations are empty, a non-empty policy will be generated that grants GetObject and optionally + * ListBucket privileges with no resources. This prevents us from sending an empty policy to AWS + * and just assuming the role with full privileges. + * + * @param roleArn A roleArn + * @param allowList Allow list or not + * @param readLocations A list of input read locations + * @param writeLocations A list of input write locations + * @return A policy limiting scope access + */ + // TODO - add KMS key access + public static IamPolicy policyString( + String roleArn, boolean allowList, Set readLocations, Set writeLocations) { + IamPolicy.Builder policyBuilder = IamPolicy.builder(); + IamStatement.Builder allowGetObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + Map bucketListStatementBuilder = new HashMap<>(); + Map bucketGetLocationStatementBuilder = new HashMap<>(); + + String arnPrefix = getArnPrefixFor(roleArn); + Stream.concat(readLocations.stream(), writeLocations.stream()) + .distinct() + .forEach( + location -> { + URI uri = URI.create(location); + allowGetObjectStatementBuilder.addResource( + // TODO add support for CN and GOV + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + final var bucket = arnPrefix + StorageUtil.getBucket(uri); + if (allowList) { + bucketListStatementBuilder + .computeIfAbsent( + bucket, + (String key) -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(key)) + .addCondition( + IamConditionOperator.STRING_LIKE, + "s3:prefix", + StorageUtil.concatFilePrefixes(trimLeadingSlash(uri.getPath()), "*", "/")); + } + bucketGetLocationStatementBuilder.computeIfAbsent( + bucket, + key -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(key)); + }); + + if (!writeLocations.isEmpty()) { + IamStatement.Builder allowPutObjectStatementBuilder = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:PutObject") + .addAction("s3:DeleteObject"); + writeLocations.forEach( + location -> { + URI uri = URI.create(location); + // TODO add support for CN and GOV + allowPutObjectStatementBuilder.addResource( + IamResource.create( + arnPrefix + StorageUtil.concatFilePrefixes(parseS3Path(uri), "*", "/"))); + }); + policyBuilder.addStatement(allowPutObjectStatementBuilder.build()); + } + if (!bucketListStatementBuilder.isEmpty()) { + bucketListStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + } else if (allowList) { + // add list privilege with 0 resources + policyBuilder.addStatement( + IamStatement.builder().effect(IamEffect.ALLOW).addAction("s3:ListBucket").build()); + } + + bucketGetLocationStatementBuilder + .values() + .forEach(statementBuilder -> policyBuilder.addStatement(statementBuilder.build())); + return policyBuilder.addStatement(allowGetObjectStatementBuilder.build()).build(); + } } diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java new file mode 100644 index 0000000000..674c735fd4 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleCredentialsStorageIntegration.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import static org.apache.polaris.core.config.FeatureConfiguration.STORAGE_CREDENTIAL_DURATION_SECONDS; +import static org.apache.polaris.core.config.PolarisConfiguration.loadConfig; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_KEY_ID; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_SECRET_KEY; +import static org.apache.polaris.core.storage.PolarisCredentialProperty.AWS_TOKEN; + +import jakarta.annotation.Nonnull; +import jakarta.ws.rs.NotAuthorizedException; +import java.net.URI; +import java.util.EnumMap; +import java.util.Set; +import org.apache.polaris.core.PolarisDiagnostics; +import org.apache.polaris.core.storage.InMemoryStorageIntegration; +import org.apache.polaris.core.storage.PolarisCredentialProperty; +import org.apache.polaris.core.storage.StorageUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.profiles.ProfileFileSupplier; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +/** S3 compatible implementation of PolarisStorageIntegration */ +public class S3CompatibleCredentialsStorageIntegration + extends InMemoryStorageIntegration { + + private static final Logger LOGGER = + LoggerFactory.getLogger(S3CompatibleCredentialsStorageIntegration.class); + + public S3CompatibleCredentialsStorageIntegration() { + super(S3CompatibleCredentialsStorageIntegration.class.getName()); + } + + @Override + public EnumMap getSubscopedCreds( + @Nonnull PolarisDiagnostics diagnostics, + @Nonnull S3CompatibleStorageConfigurationInfo storageConfig, + boolean allowListOperation, + @Nonnull Set allowedReadLocations, + @Nonnull Set allowedWriteLocations) { + + String caI = System.getenv(storageConfig.getS3CredentialsCatalogAccessKeyId()); + String caS = System.getenv(storageConfig.getS3CredentialsCatalogSecretAccessKey()); + + EnumMap propertiesMap = + new EnumMap<>(PolarisCredentialProperty.class); + propertiesMap.put(PolarisCredentialProperty.AWS_ENDPOINT, storageConfig.getS3Endpoint()); + propertiesMap.put( + PolarisCredentialProperty.AWS_PATH_STYLE_ACCESS, + storageConfig.getS3PathStyleAccess().toString()); + if (storageConfig.getS3Region() != null) { + propertiesMap.put(PolarisCredentialProperty.CLIENT_REGION, storageConfig.getS3Region()); + } + + LOGGER.debug("S3Compatible - createStsClient()"); + StsClientBuilder stsBuilder = StsClient.builder(); + stsBuilder.endpointOverride(URI.create(storageConfig.getS3Endpoint())); + if (storageConfig.getS3ProfileName() != null) { + stsBuilder.credentialsProvider( + ProfileCredentialsProvider.builder() + .profileFile(ProfileFileSupplier.defaultSupplier()) + .profileName(storageConfig.getS3ProfileName()) + .build()); + LOGGER.debug("S3Compatible - stsClient using profile from catalog settings"); + } else if (caI != null && caS != null) { + stsBuilder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(caI, caS))); + LOGGER.debug("S3Compatible - stsClient using keys from catalog settings"); + } + try (StsClient stsClient = stsBuilder.build()) { + LOGGER.debug("S3Compatible - stsClient successfully built"); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleSessionName("PolarisCredentialsSTS") + .roleArn(storageConfig.getS3RoleArn()) + .policy( + StorageUtil.policyString( + storageConfig.getS3RoleArn(), + allowListOperation, + allowedReadLocations, + allowedWriteLocations) + .toJson()) + .durationSeconds(loadConfig(STORAGE_CREDENTIAL_DURATION_SECONDS)) + .build()); + + propertiesMap.put(AWS_KEY_ID, response.credentials().accessKeyId()); + propertiesMap.put(AWS_SECRET_KEY, response.credentials().secretAccessKey()); + propertiesMap.put(AWS_TOKEN, response.credentials().sessionToken()); + LOGGER.debug( + "S3Compatible - assumeRole - Obtained token expiration : {}", + response.credentials().expiration().toString()); + } catch (Exception e) { + throw new NotAuthorizedException("Unable to build S3 Security Token Service client", e); + } + + return propertiesMap; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java new file mode 100644 index 0000000000..e4e4cfd048 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/storage/s3compatible/S3CompatibleStorageConfigurationInfo.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.storage.s3compatible; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.util.List; +import org.apache.polaris.core.storage.PolarisStorageConfigurationInfo; + +/** + * S3-Compatible Storage Configuration. This class holds the parameters needed to connect to + * S3-compatible storage services such as MinIO, Ceph, Dell ECS, etc. + */ +public class S3CompatibleStorageConfigurationInfo extends PolarisStorageConfigurationInfo { + + private final @Nonnull String s3Endpoint; + private final @Nullable String s3ProfileName; + private final @Nullable String s3CredentialsCatalogAccessKeyId; + private final @Nullable String s3CredentialsCatalogSecretAccessKey; + private final @Nonnull Boolean s3PathStyleAccess; + private final @Nullable String s3Region; + private final @Nullable String s3RoleArn; + + @JsonCreator + public S3CompatibleStorageConfigurationInfo( + @JsonProperty(value = "s3Endpoint", required = true) @Nonnull String s3Endpoint, + @JsonProperty(value = "s3ProfileName", required = false) @Nullable String s3ProfileName, + @JsonProperty(value = "s3CredentialsCatalogAccessKeyId", required = false) @Nullable + String s3CredentialsCatalogAccessKeyId, + @JsonProperty(value = "s3CredentialsCatalogSecretAccessKey", required = false) @Nullable + String s3CredentialsCatalogSecretAccessKey, + @JsonProperty(value = "s3PathStyleAccess", required = false, defaultValue = "false") @Nonnull + Boolean s3PathStyleAccess, + @JsonProperty(value = "s3Region", required = false) @Nullable String s3Region, + @JsonProperty(value = "s3RoleArn", required = false) @Nullable String s3RoleArn, + @JsonProperty(value = "allowedLocations", required = true) @Nonnull + List allowedLocations) { + + super(StorageType.S3_COMPATIBLE, allowedLocations); + this.s3PathStyleAccess = s3PathStyleAccess; + this.s3Endpoint = s3Endpoint; + this.s3ProfileName = s3ProfileName; + this.s3CredentialsCatalogAccessKeyId = + (s3CredentialsCatalogAccessKeyId == null) ? "" : s3CredentialsCatalogAccessKeyId; + this.s3CredentialsCatalogSecretAccessKey = + (s3CredentialsCatalogSecretAccessKey == null) ? "" : s3CredentialsCatalogSecretAccessKey; + this.s3Region = s3Region; + this.s3RoleArn = (s3RoleArn == null) ? "" : s3RoleArn; + } + + public @Nonnull String getS3Endpoint() { + return this.s3Endpoint; + } + + public @Nullable String getS3ProfileName() { + return this.s3ProfileName; + } + + public @Nonnull Boolean getS3PathStyleAccess() { + return this.s3PathStyleAccess; + } + + public @Nullable String getS3CredentialsCatalogAccessKeyId() { + return this.s3CredentialsCatalogAccessKeyId; + } + + public @Nullable String getS3CredentialsCatalogSecretAccessKey() { + return this.s3CredentialsCatalogSecretAccessKey; + } + + public @Nullable String getS3RoleArn() { + return this.s3RoleArn; + } + + public @Nullable String getS3Region() { + return this.s3Region; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("storageType", getStorageType().name()) + .add("allowedLocation", getAllowedLocations()) + .add("s3Region", getS3Region()) + .add("s3RoleArn", getS3RoleArn()) + .add("s3PathStyleAccess", getS3PathStyleAccess()) + .add("s3Endpoint", getS3Endpoint()) + .add("s3ProfileName", getS3ProfileName()) + .toString(); + } + + @Override + public String getFileIoImplClassName() { + return "org.apache.iceberg.aws.s3.S3FileIO"; + } +} diff --git a/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java new file mode 100644 index 0000000000..6e1331172b --- /dev/null +++ b/polaris-core/src/test/java/org/apache/polaris/service/storage/s3compatible/S3CompatibleStorageConfigurationInfoTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.service.storage.s3compatible; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleStorageConfigurationInfo; +import org.junit.jupiter.api.Test; + +public class S3CompatibleStorageConfigurationInfoTest { + + @Test + public void testS3CompatibleStorageConfigurationInfo() { + String warehouseDir = "s3://bucket/path/to/warehouse"; + S3CompatibleStorageConfigurationInfo conf = + new S3CompatibleStorageConfigurationInfo( + "http://localhost:9000", + null, + "MINIO_S3_CATALOG_1_ID", + "MINIO_S3_CATALOG_1_SECRET", + true, + null, + null, + List.of(warehouseDir)); + assertThat(conf).isNotNull(); + assertThat(conf.getS3Endpoint()).isEqualTo("http://localhost:9000"); + assertThat(conf.getS3ProfileName()).isNull(); + assertThat(conf.getS3CredentialsCatalogAccessKeyId()).isEqualTo("MINIO_S3_CATALOG_1_ID"); + assertThat(conf.getS3CredentialsCatalogSecretAccessKey()) + .isEqualTo("MINIO_S3_CATALOG_1_SECRET"); + assertThat(conf.getS3PathStyleAccess()).isTrue(); + assertThat(conf.getS3Region()).isNull(); + assertThat(conf.getS3RoleArn()).isEqualTo(""); + } +} diff --git a/quarkus/defaults/src/main/resources/application-it.properties b/quarkus/defaults/src/main/resources/application-it.properties index 5e110071de..014cfdc46e 100644 --- a/quarkus/defaults/src/main/resources/application-it.properties +++ b/quarkus/defaults/src/main/resources/application-it.properties @@ -37,7 +37,7 @@ polaris.features.defaults."ALLOW_WILDCARD_LOCATION"=true polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=true polaris.features.defaults."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_it"=true polaris.features.defaults."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","GCS","AZURE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["FILE","S3","S3_COMPATIBLE","GCS","AZURE"] polaris.realm-context.realms=POLARIS,OTHER diff --git a/quarkus/defaults/src/main/resources/application.properties b/quarkus/defaults/src/main/resources/application.properties index fcfbc676ff..fa0a811ec0 100644 --- a/quarkus/defaults/src/main/resources/application.properties +++ b/quarkus/defaults/src/main/resources/application.properties @@ -90,7 +90,7 @@ polaris.realm-context.header-name=Polaris-Realm polaris.realm-context.require-header=false polaris.features.defaults."ENFORCE_PRINCIPAL_CREDENTIAL_ROTATION_REQUIRED_CHECKING"=false -polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","GCS","AZURE","FILE"] +polaris.features.defaults."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","S3_COMPATIBLE","GCS","AZURE","FILE"] # realm overrides # polaris.features.realm-overrides."my-realm"."INITIALIZE_DEFAULT_CATALOG_FILEIO_FOR_TEST"=true # polaris.features.realm-overrides."my-realm"."SKIP_CREDENTIAL_SUBSCOPING_INDIRECTION"=true diff --git a/regtests/README.md b/regtests/README.md index 1fb1bbcdb5..aba96be6a2 100644 --- a/regtests/README.md +++ b/regtests/README.md @@ -40,7 +40,7 @@ follows: ```shell ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache -docker compose -f ./regtests/docker-compose.yml up --build --exit-code-from regtest +docker-compose -f ./regtests/docker-compose.yml up --build --exit-code-from regtest ``` In this setup, a Polaris container will be started in a docker-compose group, using the image @@ -205,4 +205,4 @@ and download all of the test dependencies into it. From here, `run.sh` will be a To debug, setup IntelliJ to point at your virtual environment to find your test dependencies (see https://www.jetbrains.com/help/idea/configuring-python-sdk.html). Then run the test in your IDE. -The above is handled automatically when running reg tests from the docker image. \ No newline at end of file +The above is handled automatically when running reg tests from the docker image. diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 94a0f7502b..16572db988 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -31,6 +31,8 @@ services: AZURE_TENANT_ID: $AZURE_TENANT_ID AZURE_CLIENT_ID: $AZURE_CLIENT_ID AZURE_CLIENT_SECRET: $AZURE_CLIENT_SECRET + MINIO_S3_CATALOG_1_ID: minio-user-catalog + MINIO_S3_CATALOG_1_SECRET: 12345678-minio-catalog POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,secret quarkus.log.file.enable: "false" quarkus.otel.sdk.disabled: "true" @@ -49,6 +51,8 @@ services: depends_on: polaris: condition: service_healthy + minio-configured-without-tls: + condition: service_started environment: AWS_TEST_ENABLED: $AWS_TEST_ENABLED AWS_STORAGE_BUCKET: $AWS_STORAGE_BUCKET @@ -70,3 +74,33 @@ services: volumes: - ./output:/tmp/polaris-regtests/ - ./credentials:/tmp/credentials/ + minio-without-tls: + image: minio/minio:latest + container_name: minio-without-tls + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + ports: + - 9000:9000 + volumes: + - ./minio/miniodata:/data + command: ["server", "/data"] + minio-configured-without-tls: + depends_on: + - minio-without-tls + image: minio/mc:latest + container_name: minio-configured-without-tls + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://minio-without-tls:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + tail -f /dev/null + " diff --git a/regtests/minio/Readme.md b/regtests/minio/Readme.md new file mode 100644 index 0000000000..afa54e0b2a --- /dev/null +++ b/regtests/minio/Readme.md @@ -0,0 +1,41 @@ + + +# MiniIO Secured +## Minio and secured buckets with TLS self-signed / custom Certificate Authority + +To be able to test Polaris with buckets in TLS under custom Certificate Authority or self-signed certificate + +## MiniIO generate self-signed certificates designed for docker-compose setup + +- Download minio certificate generator : https://github.com/minio/certgen +- Generate certifications: ```./certgen -host "localhost,minio,*"``` +- put them in ./certs and ./certs/CAs. They will be mounted in the default MinIO container placeholder. + +## Test minIO secured TLS buckets from self-signed certificate with AWS CLI +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --no-verify-ssl``` +- ```aws s3 ls s3:// --recursive --endpoint-url=https://localhost:9000 --ca-bundle=./certs/public.crt``` + +## add to java cacerts only the public.crt as an Certificate Authority +- ```sudo keytool -import -trustcacerts -cacerts -storepass changeit -noprompt -alias minio -file ./certs/public.crt``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` + +## remove from java cacerts the public.crt +- ```sudo keytool -delete -trustcacerts -cacerts -storepass changeit -noprompt -alias minio``` +- ```keytool -list -cacerts -alias minio -storepass changeit``` diff --git a/regtests/minio/certs/CAs/private.key b/regtests/minio/certs/CAs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/CAs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/CAs/public.crt b/regtests/minio/certs/CAs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/CAs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/certs/private.key b/regtests/minio/certs/private.key new file mode 100644 index 0000000000..e2e7ffca0c --- /dev/null +++ b/regtests/minio/certs/private.key @@ -0,0 +1,5 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgqt8snxuGN+69o5tw +pHvoLV9e7GMIqYfGdA8L0k7+yV+hRANCAAS9oQlQk2nk4UxFreVLDlXvBplQLzvR +cm9fLzYDXQ6SXb7RWusrIJ0mJU6b/u4xQOcW5IB3ADj1SQ4N9SrjOX2m +-----END PRIVATE KEY----- diff --git a/regtests/minio/certs/public.crt b/regtests/minio/certs/public.crt new file mode 100644 index 0000000000..b06cc51e5d --- /dev/null +++ b/regtests/minio/certs/public.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4jCCAYegAwIBAgIQElGrcf0kjaLwbaan1e8WZTAKBggqhkjOPQQDAjA2MRww +GgYDVQQKExNDZXJ0Z2VuIERldmVsb3BtZW50MRYwFAYDVQQLDA1maWRAcGVyc29k +ZWxsMB4XDTI0MTAxNTIxNDQxOVoXDTI1MTAxNTIxNDQxOVowNjEcMBoGA1UEChMT +Q2VydGdlbiBEZXZlbG9wbWVudDEWMBQGA1UECwwNZmlkQHBlcnNvZGVsbDBZMBMG +ByqGSM49AgEGCCqGSM49AwEHA0IABL2hCVCTaeThTEWt5UsOVe8GmVAvO9Fyb18v +NgNdDpJdvtFa6ysgnSYlTpv+7jFA5xbkgHcAOPVJDg31KuM5faajdzB1MA4GA1Ud +DwEB/wQEAwICpDATBgNVHSUEDDAKBggrBgEFBQcDATAPBgNVHRMBAf8EBTADAQH/ +MB0GA1UdDgQWBBTb6lIhkV1RLhfKNPrcdGEkxsvkrjAeBgNVHREEFzAVgglsb2Nh +bGhvc3SCBW1pbmlvggEqMAoGCCqGSM49BAMCA0kAMEYCIQDLm8+CZvB+7gRpCRr6 +BCAJBF8A3e6Pv7G1oCS1uwiUhQIhAI3Z/aBYatMkbb4VmQH1VZC8CvUyNPHS5sTa +saXcmTbe +-----END CERTIFICATE----- diff --git a/regtests/minio/docker-compose.yml b/regtests/minio/docker-compose.yml new file mode 100644 index 0000000000..0c528d94c6 --- /dev/null +++ b/regtests/minio/docker-compose.yml @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris-minio: + image: minio/minio:latest + container_name: minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + minio_net: + aliases: + - warehouse.minio + - warehouse2.minio + ports: + - 9001:9001 + - 9000:9000 + volumes: + - ./miniodata:/data + - ./certs:/root/.minio/certs/ + command: ["server", "/data", "--console-address", ":9001"] + minio-configured: + depends_on: + - polaris-minio + image: minio/mc:latest + container_name: mc + networks: + minio_net: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + volumes: + - ./certs:/root/.mc/certs + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio https://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force --quiet minio/warehouse; + /usr/bin/mc mb --ignore-existing minio/warehouse; + /usr/bin/mc rm -r --force --quiet minio/warehouse2; + /usr/bin/mc mb --ignore-existing minio/warehouse2; + /usr/bin/mc admin user add minio minio-user-catalog 12345678-minio-catalog; + /usr/bin/mc admin policy attach minio readwrite --user minio-user-catalog; + tail -f /dev/null + " +networks: + minio_net: + diff --git a/regtests/minio/miniodata/Readme.md b/regtests/minio/miniodata/Readme.md new file mode 100644 index 0000000000..d65c6f4723 --- /dev/null +++ b/regtests/minio/miniodata/Readme.md @@ -0,0 +1 @@ +# Folder for MinIO data container volume diff --git a/regtests/minio/queries-for-spark.sql b/regtests/minio/queries-for-spark.sql new file mode 100644 index 0000000000..0932af1ee7 --- /dev/null +++ b/regtests/minio/queries-for-spark.sql @@ -0,0 +1,38 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE DATABASE IF NOT EXISTS db1; +CREATE DATABASE IF NOT EXISTS db1.ns1; +CREATE DATABASE IF NOT EXISTS db1.ns2; +CREATE OR REPLACE TABLE db1.ns1.table1 ( f1 int, f2 int ); +INSERT INTO db1.ns1.table1 VALUES (10, 20); +INSERT INTO db1.ns1.table1 VALUES (11, 21); +INSERT INTO db1.ns1.table1 VALUES (12, 22); +SELECT * FROM db1.ns1.table1; + +CREATE OR REPLACE VIEW db1.ns2.view1 ( line_count COMMENT 'Count of lines') AS SELECT COUNT(1) as qty FROM db1.ns1.table1; +SELECT * FROM db1.ns2.view1; +INSERT INTO db1.ns1.table1 VALUES (13, 23); +SELECT * FROM db1.ns2.view1; + +-- Test the second bucket allowed in the catalog +CREATE DATABASE IF NOT EXISTS wh2 LOCATION 's3://warehouse2/polaris'; +CREATE OR REPLACE TABLE wh2.table1 ( f1 int, f2 int ); +INSERT INTO wh2.table1 VALUES (01, 02); +SELECT * FROM wh2.table1; + +quit; diff --git a/regtests/run_spark_sql_s3compatibleTLS.sh b/regtests/run_spark_sql_s3compatibleTLS.sh new file mode 100755 index 0000000000..3f2dddd4e6 --- /dev/null +++ b/regtests/run_spark_sql_s3compatibleTLS.sh @@ -0,0 +1,226 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ----------------------------------------------------------------------------- +# Purpose: Launch the Spark SQL shell to interact with Polaris and do NRT. +# ----------------------------------------------------------------------------- +# +# Requisite: +# This script use a MinIO with TLS. +# Please follow instructions in regtests/minio/Readme.md and update your +# java cacerts with self-signed certificate +# +# Usage: +# ./run_spark_sql_s3compatibleTLS.sh [S3-location] +# +# Description: +# - Without arguments: Runs against default minio bucket s3://warehouse/polaris +# - With one arguments: Runs against a catalog backed by minio S3. +# - [S3-location] - The S3 path to use as the default base location for the catalog. +# +# Examples: +# - Run against AWS S3_COMPATIBLE: +# ./run_spark_sql_s3compatibleTLS.sh s3://warehouse/polaris + + +clear + +if [ $# -ne 0 ] && [ $# -ne 1 ]; then + echo "run_spark_sql_s3compatibleTLS.sh only accepts 1 or 0 argument, argument is the the bucket, by default it will be s3://warehouse/polaris" + echo "Usage: ./run_spark_sql.sh [S3-location]" + exit 1 +fi + +# Init +REGTEST_HOME=$(dirname $(realpath $0)) +cd ${REGTEST_HOME} + + +if [ $# -eq 0 ]; then + echo "creating a catalog backed by S3, default bucket is s3://warehouse/polaris" + S3_LOCATION="s3://warehouse/polaris" +fi + +if [ $# -eq 1 ]; then + echo "creating a catalog backed by S3 from first arg of this script respecting pattern 's3://mybucket/path'" + S3_LOCATION=$1 +fi +# Second location for testing catalog update +S3_LOCATION_2="s3://warehouse2/polaris/" + +# If Polaris run inMemory classic mode, principal credentials are : root:secret +# If Polaris run inMemory DEBUG mode, principal credentials are to retrieve from service log within this pattern: 522f251cc2b9c121:6eff0915385979684d575fa1d3f18e2b + +# SPARK_BEARER_TOKEN +if ! output=$(curl -s -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=secret" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi +SPARK_BEARER_TOKEN=$(echo "$output" | awk -F\" '{print $4}') +if [ "$SPARK_BEARER_TOKEN" == "unauthorized_client" ]; then + echo "Error: Failed to retrieve bearer token" + exit 1 +fi + +# check if Polaris is running +polaris_http_code=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs --output /dev/null) +if [ $polaris_http_code -eq 000 ] && [ $polaris_http_code -ne 200 ]; then + echo "Polaris is not running on ${POLARIS_HOST:-localhost}:8181. End of script" + exit 1 +fi + + +# check if cacerts contain MinIO certificate +cert_response=$(keytool -list -cacerts -alias minio -storepass changeit | grep trustedCertEntry) +echo $cert_response +if [ -z "$cert_response" ]; then + echo "There is no MinIO certificate in your cacerts, please read regtests/minio/Readme.md" + echo "End of script :-(" + exit 1 +fi + +# start minio with buckets and users +echo -e "\n\n-------\n\n" +echo "Start a minio with secured self-signed buckets s3://warehouse and users, wait a moment please..." +docker-compose --progress tty --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml up -d minio-configured + +echo "minio browser is available during this test in https://localhost:9001 admin/password (please accept the self signed certificate)" +echo -e "\n\n-------\n\n" + +# spark setup +echo "Doing spark setup... wait a moment" +export SPARK_VERSION=spark-3.5.4 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3 +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding +./setup.sh > /dev/null 2>&1 +if [ -z "${SPARK_HOME}" ]; then + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi + + +echo """ +These environment variables have to be available to Polaris service or as keys in the aws profile, and the name of this profile provided to the catalog as parameter : +export CATALOG_S3_KEY_ID=minio-user-catalog +export CATALOG_S3_KEY_SECRET=12345678-minio-catalog +""" +echo Add minio-catalog-1 section in aws profile +cat >>~/.aws/credentials < /dev/stderr + + +echo -e "\n\n---- Assign the catalog_admin to the service_admin.\n" +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ + -d '{"name": "catalog_admin"}' > /dev/stderr + + +echo -e "\n\n---- Start Spark-sql to test Polaris catalog with queries\n" +${SPARK_HOME}/bin/spark-sql --verbose \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ + --conf spark.sql.catalog.polaris.warehouse=manual_spark \ + --conf spark.sql.defaultCatalog=polaris \ + --conf spark.hadoop.hive.cli.print.header=true \ + -f "minio/queries-for-spark.sql" + + +echo Remove minio-catalog-1 section from aws profile +sed -i '/\[minio-catalog-1\]/,${/\[minio-catalog-1\]/d; d}' ~/.aws/credentials +echo Done. + +echo +echo End of tests, a table and a view data with displayed should be visible in log above +echo Minio stopping, bucket browser will be shutdown, volume data of the bucket remains in 'regtests/minio/miniodata' +echo ':-)' + +docker-compose --project-name polaris-minio --project-directory minio/ -f minio/docker-compose.yml down \ No newline at end of file diff --git a/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref b/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref new file mode 100755 index 0000000000..d1a453e299 --- /dev/null +++ b/regtests/t_spark_sql/ref/spark_sql_minio.sh.ref @@ -0,0 +1,36 @@ +S3COMPATIBLE Starting test. (shell script called minio to avoid to be trapped by AWS test pattern based on s3 word) +{"defaults":{"default-base-location":"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog"},"overrides":{"prefix":"spark_sql_s3compatible_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","POST /v1/{prefix}/transactions/commit"]} +Catalog created +spark-sql (default)> use polaris; +spark-sql ()> show namespaces; +spark-sql ()> create namespace db1; +spark-sql ()> create namespace db2; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> + > create namespace db1.schema1; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> show namespaces in db1; +db1.schema1 +spark-sql ()> + > create table db1.schema1.tbl1 (col1 int); +spark-sql ()> show tables in db1; +spark-sql ()> use db1.schema1; +spark-sql (db1.schema1)> + > insert into tbl1 values (123), (234); +spark-sql (db1.schema1)> select * from tbl1; +123 +234 +spark-sql (db1.schema1)> + > drop table tbl1 purge; +spark-sql (db1.schema1)> show tables; +spark-sql (db1.schema1)> drop namespace db1.schema1; +spark-sql (db1.schema1)> drop namespace db1; +spark-sql (db1.schema1)> show namespaces; +db2 +spark-sql (db1.schema1)> drop namespace db2; +spark-sql (db1.schema1)> show namespaces; +spark-sql (db1.schema1)> diff --git a/regtests/t_spark_sql/src/spark_sql_minio.sh b/regtests/t_spark_sql/src/spark_sql_minio.sh new file mode 100755 index 0000000000..b1c3cf6911 --- /dev/null +++ b/regtests/t_spark_sql/src/spark_sql_minio.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#if [ -z "$S3COMPATIBLE_TEST_ENABLED" ] || [ "$S3COMPATIBLE_TEST_ENABLED" != "true" ]; then +# echo "S3COMPATIBLE_TEST_ENABLED is not set to 'true'. Skipping test." +# exit 0 +#fi +echo "S3COMPATIBLE Starting test. (shell script called minio to avoid to be trapped by AWS test pattern based on s3 word)" + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{\"name\": \"spark_sql_s3compatible_catalog\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://warehouse/polaris_test/spark_sql_s3compatible_catalog\"}, \"storageConfigInfo\": {\"storageType\": \"S3_COMPATIBLE\", \"allowedLocations\": [\"s3://warehouse/polaris_test/\"], \"s3.endpoint\": \"http://minio-without-tls:9000\", \"s3.pathStyleAccess\": true, \"s3.credentials.catalog.accessKeyEnvVar\": \"MINIO_S3_CATALOG_1_ID\", \"s3.credentials.catalog.secretAccessKeyEnvVar\": \"MINIO_S3_CATALOG_1_SECRET\" }}" > /dev/stderr + + +# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/spark_sql_s3compatible_catalog/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + +# For now, also explicitly assign the catalog_admin to the service_admin. Remove once GS fully rolled out for auto-assign. +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/spark_sql_s3compatible_catalog \ + -d '{"name": "catalog_admin"}' > /dev/stderr + +curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=spark_sql_s3compatible_catalog" +echo +echo "Catalog created" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=spark_sql_s3compatible_catalog +use polaris; +show namespaces; +create namespace db1; +create namespace db2; +show namespaces; + +create namespace db1.schema1; +show namespaces; +show namespaces in db1; + +create table db1.schema1.tbl1 (col1 int); +show tables in db1; +use db1.schema1; + +insert into tbl1 values (123), (234); +select * from tbl1; + +drop table tbl1 purge; +show tables; +drop namespace db1.schema1; +drop namespace db1; +show namespaces; +drop namespace db2; +show namespaces; +EOF + +curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/spark_sql_s3compatible_catalog > /dev/stderr diff --git a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java index f61c67620f..3f7247b483 100644 --- a/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java +++ b/service/common/src/main/java/org/apache/polaris/service/storage/PolarisStorageIntegrationProviderImpl.java @@ -39,6 +39,7 @@ import org.apache.polaris.core.storage.aws.AwsCredentialsStorageIntegration; import org.apache.polaris.core.storage.azure.AzureCredentialsStorageIntegration; import org.apache.polaris.core.storage.gcp.GcpCredentialsStorageIntegration; +import org.apache.polaris.core.storage.s3compatible.S3CompatibleCredentialsStorageIntegration; import software.amazon.awssdk.services.sts.StsClient; @ApplicationScoped @@ -73,6 +74,10 @@ public PolarisStorageIntegrationProviderImpl( (PolarisStorageIntegration) new AwsCredentialsStorageIntegration(stsClientSupplier.get()); break; + case S3_COMPATIBLE: + storageIntegration = + (PolarisStorageIntegration) new S3CompatibleCredentialsStorageIntegration(); + break; case GCS: storageIntegration = (PolarisStorageIntegration) diff --git a/spec/polaris-management-service.yml b/spec/polaris-management-service.yml index a58f5a69b6..459f3c7744 100644 --- a/spec/polaris-management-service.yml +++ b/spec/polaris-management-service.yml @@ -862,6 +862,7 @@ components: type: string enum: - S3 + - S3_COMPATIBLE - GCS - AZURE - FILE @@ -877,6 +878,7 @@ components: propertyName: storageType mapping: S3: "#/components/schemas/AwsStorageConfigInfo" + S3_COMPATIBLE: "#/components/schemas/S3CompatibleStorageConfigInfo" AZURE: "#/components/schemas/AzureStorageConfigInfo" GCS: "#/components/schemas/GcpStorageConfigInfo" FILE: "#/components/schemas/FileStorageConfigInfo" @@ -905,6 +907,44 @@ components: required: - roleArn + S3CompatibleStorageConfigInfo: + type: object + description: s3-compatible storage configuration info (MinIO, Ceph, Dell ECS, Netapp StorageGRID, ...) + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + s3.endpoint: + type: string + description: S3 endpoint + example: "http[s]://host:port" + s3.profileName: + type: string + description: optional - S3 profile name (credentials) used by this catalog to communicate with S3 + example: "default or minio-storage-catalog-1 or ceph-storage-catalog-2" + s3.credentials.catalog.accessKeyEnvVar: + type: string + description: optional - environment variable name for the 'ACCESS_KEY_ID' used by this catalog to communicate with S3 + example: "CATALOG_1_ACCESS_KEY_ENV_VARIABLE_NAME or AWS_ACCESS_KEY_ID" + s3.credentials.catalog.secretAccessKeyEnvVar: + type: string + description: optional - environment variable name for the 'SECRET_ACCESS_KEY' used by this catalog to communicate with S3 + example: "CATALOG_1_SECRET_KEY_ENV_VARIABLE_NAME or AWS_SECRET_ACCESS_KEY" + s3.pathStyleAccess: + type: boolean + description: optional - whether or not to use path-style access + default: false + s3.region: + type: string + description: optional - s3 region where data is stored + example: "rack-1 or us-east-1" + s3.roleArn: + type: string + description: optional - s3 role arn, used with assumeRole to obtain a Security Token Service + pattern: '^([u|a]rn:\S*:\S*:\S*:\S*:\S*).*$' + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234 or urn:ecs:iam::namespace:user/role" + required: + - s3.endpoint + AzureStorageConfigInfo: type: object description: azure storage configuration info