From f2bf08cae137ca1335cee0c4f4b2feda97a64db0 Mon Sep 17 00:00:00 2001 From: ahmarsuhail Date: Thu, 24 Nov 2022 11:55:06 +0000 Subject: [PATCH 01/13] HADOOP-18073. Upgrade AWS SDK to v2 in S3A [work in progress] See aws_sdk_v2_changelog.md for details. Co-authored-by: Ahmar Suhail Co-authored-by: Alessandro Passaro --- hadoop-project/pom.xml | 23 + hadoop-tools/hadoop-aws/pom.xml | 10 + .../hadoop/fs/s3a/AWSBadRequestException.java | 4 +- .../apache/hadoop/fs/s3a/AWSCannedACL.java | 40 + .../apache/hadoop/fs/s3a/AWSClientConfig.java | 340 ++++++++ .../hadoop/fs/s3a/AWSClientIOException.java | 13 +- .../fs/s3a/AWSCredentialProviderList.java | 99 ++- .../hadoop/fs/s3a/AWSNoResponseException.java | 4 +- .../hadoop/fs/s3a/AWSRedirectException.java | 4 +- .../hadoop/fs/s3a/AWSS3IOException.java | 25 +- .../hadoop/fs/s3a/AWSServiceIOException.java | 37 +- .../fs/s3a/AWSServiceThrottledException.java | 4 +- .../hadoop/fs/s3a/AWSStatus500Exception.java | 4 +- .../s3a/AnonymousAWSCredentialsProvider.java | 18 +- .../org/apache/hadoop/fs/s3a/ArnResource.java | 12 +- .../org/apache/hadoop/fs/s3a/Constants.java | 6 +- .../CredentialInitializationException.java | 17 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 224 ++++- .../hadoop/fs/s3a/FailureInjectionPolicy.java | 2 +- .../fs/s3a/InconsistentAmazonS3Client.java | 345 -------- .../fs/s3a/InconsistentS3ClientFactory.java | 84 +- .../org/apache/hadoop/fs/s3a/Invoker.java | 19 +- .../org/apache/hadoop/fs/s3a/Listing.java | 53 +- ...t.java => MultiObjectDeleteException.java} | 68 +- .../apache/hadoop/fs/s3a/MultipartUtils.java | 49 +- .../fs/s3a/ProgressableProgressListener.java | 48 +- .../hadoop/fs/s3a/S3ABlockOutputStream.java | 92 +- .../apache/hadoop/fs/s3a/S3ADataBlocks.java | 8 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 808 +++++++++--------- .../apache/hadoop/fs/s3a/S3AInputStream.java | 116 ++- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 14 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 361 +++++--- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 68 +- .../apache/hadoop/fs/s3a/S3ListRequest.java | 16 +- .../apache/hadoop/fs/s3a/S3ListResult.java | 61 +- .../hadoop/fs/s3a/S3ObjectAttributes.java | 27 - .../s3a/SharedInstanceCredentialProvider.java | 1 - .../fs/s3a/SimpleAWSCredentialsProvider.java | 19 +- .../s3a/TemporaryAWSCredentialsProvider.java | 7 +- .../org/apache/hadoop/fs/s3a/UploadInfo.java | 12 +- .../hadoop/fs/s3a/WriteOperationHelper.java | 200 ++--- .../apache/hadoop/fs/s3a/WriteOperations.java | 100 +-- .../V1ToV2AwsCredentialProviderAdapter.java | 75 ++ .../V1V2AwsCredentialProviderAdapter.java | 36 + .../hadoop/fs/s3a/adapter/package-info.java | 27 + .../hadoop/fs/s3a/api/RequestFactory.java | 211 ++--- .../fs/s3a/audit/AWSAuditEventCallbacks.java | 105 +-- .../fs/s3a/audit/AWSRequestAnalyzer.java | 135 +-- .../hadoop/fs/s3a/audit/AuditIntegration.java | 28 +- .../hadoop/fs/s3a/audit/AuditManagerS3A.java | 22 +- .../fs/s3a/audit/S3AAuditConstants.java | 13 +- .../s3a/audit/impl/ActiveAuditManagerS3A.java | 410 +++++---- .../fs/s3a/audit/impl/LoggingAuditor.java | 116 +-- .../s3a/audit/impl/NoopAuditManagerS3A.java | 17 +- .../audit/impl/S3AInternalAuditConstants.java | 16 +- .../auth/AbstractAWSCredentialProvider.java | 13 +- .../AbstractSessionCredentialsProvider.java | 28 +- .../auth/AssumedRoleCredentialProvider.java | 66 +- .../auth/IAMInstanceCredentialsProvider.java | 40 +- .../s3a/auth/MarshalledCredentialBinding.java | 56 +- .../auth/MarshalledCredentialProvider.java | 5 +- .../fs/s3a/auth/NoAuthWithAWSException.java | 2 +- .../hadoop/fs/s3a/auth/STSClientFactory.java | 135 +-- .../EncryptionSecretOperations.java | 40 +- .../s3a/auth/delegation/RoleTokenBinding.java | 2 +- .../auth/delegation/S3ADelegationTokens.java | 5 +- .../auth/delegation/SessionTokenBinding.java | 34 +- .../fs/s3a/commit/AbstractS3ACommitter.java | 9 +- .../hadoop/fs/s3a/commit/PutTracker.java | 4 +- .../s3a/commit/files/SinglePendingCommit.java | 13 +- .../fs/s3a/commit/impl/CommitOperations.java | 54 +- .../s3a/commit/magic/MagicCommitTracker.java | 28 +- .../fs/s3a/impl/BulkDeleteRetryHandler.java | 12 +- .../fs/s3a/impl/ChangeDetectionPolicy.java | 129 +-- .../hadoop/fs/s3a/impl/ChangeTracker.java | 87 +- .../hadoop/fs/s3a/impl/CopyOutcome.java | 80 -- .../hadoop/fs/s3a/impl/DeleteOperation.java | 17 +- .../hadoop/fs/s3a/impl/ErrorTranslation.java | 14 +- .../hadoop/fs/s3a/impl/HeaderProcessing.java | 141 ++- .../hadoop/fs/s3a/impl/InternalConstants.java | 47 +- .../fs/s3a/impl/OperationCallbacks.java | 16 +- .../hadoop/fs/s3a/impl/RenameOperation.java | 24 +- .../fs/s3a/impl/RequestFactoryImpl.java | 657 +++++++------- .../fs/s3a/impl/S3AMultipartUploader.java | 31 +- .../hadoop/fs/s3a/impl/SDKStreamDrainer.java | 43 +- .../hadoop/fs/s3a/impl/V2Migration.java | 12 + .../fs/s3a/prefetch/S3ARemoteObject.java | 54 +- .../s3a/prefetch/S3ARemoteObjectReader.java | 7 +- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 10 +- .../fs/s3a/select/BlockingEnumeration.java | 151 ++++ .../hadoop/fs/s3a/select/SelectBinding.java | 127 ++- .../select/SelectEventStreamPublisher.java | 124 +++ .../fs/s3a/select/SelectInputStream.java | 33 +- .../s3a/select/SelectObjectContentHelper.java | 111 +++ .../impl/AwsStatisticsCollector.java | 166 ++-- .../hadoop/fs/s3a/tools/MarkerTool.java | 17 +- .../fs/s3a/tools/MarkerToolOperations.java | 14 +- .../s3a/tools/MarkerToolOperationsImpl.java | 12 +- .../tools/hadoop-aws/aws_sdk_v2_changelog.md | 340 ++++++++ .../site/markdown/tools/hadoop-aws/testing.md | 22 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 29 +- .../hadoop/fs/s3a/EncryptionTestUtils.java | 20 +- .../s3a/ITestS3AAWSCredentialsProvider.java | 25 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 35 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 60 +- .../ITestS3AEncryptionSSEKMSDefaultKey.java | 8 +- ...estS3AEncryptionWithDefaultS3Settings.java | 2 +- .../fs/s3a/ITestS3AFailureHandling.java | 28 +- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 33 +- .../hadoop/fs/s3a/ITestS3AMultipartUtils.java | 5 +- .../hadoop/fs/s3a/ITestS3ARequesterPays.java | 2 +- .../hadoop/fs/s3a/ITestS3AStorageClass.java | 1 - .../fs/s3a/ITestS3ATemporaryCredentials.java | 26 +- .../hadoop/fs/s3a/MockS3AFileSystem.java | 17 +- .../hadoop/fs/s3a/MockS3ClientFactory.java | 23 +- .../hadoop/fs/s3a/MultipartTestUtils.java | 36 +- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 10 +- .../apache/hadoop/fs/s3a/TestArnResource.java | 9 +- .../org/apache/hadoop/fs/s3a/TestInvoker.java | 105 ++- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 30 +- .../fs/s3a/TestS3ABlockOutputStream.java | 11 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 26 +- .../fs/s3a/TestS3AExceptionTranslation.java | 90 +- .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 99 +-- .../fs/s3a/TestS3AInputStreamRetry.java | 130 ++- .../apache/hadoop/fs/s3a/TestS3AUnbuffer.java | 51 +- .../fs/s3a/TestStreamChangeTracker.java | 107 ++- .../fs/s3a/audit/AbstractAuditingTest.java | 30 +- .../hadoop/fs/s3a/audit/AuditTestSupport.java | 4 +- .../fs/s3a/audit/ITestAuditManager.java | 10 +- ...ava => SimpleAWSExecutionInterceptor.java} | 20 +- .../fs/s3a/audit/TestAuditIntegration.java | 83 +- .../fs/s3a/audit/TestAuditSpanLifecycle.java | 11 +- .../audit/TestHttpReferrerAuditHeader.java | 17 +- .../fs/s3a/audit/TestLoggingAuditor.java | 41 +- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 24 +- .../hadoop/fs/s3a/auth/RoleTestUtils.java | 1 - .../s3a/auth/TestMarshalledCredentials.java | 10 +- .../delegation/CountInvocationsProvider.java | 13 +- .../ITestSessionDelegationInFilesystem.java | 24 +- .../ITestSessionDelegationTokens.java | 24 +- .../TestS3ADelegationTokenSupport.java | 1 - .../s3a/commit/AbstractITCommitProtocol.java | 2 +- .../s3a/commit/staging/StagingTestBase.java | 178 ++-- .../staging/TestDirectoryCommitterScale.java | 8 +- .../commit/staging/TestStagingCommitter.java | 31 +- .../TestStagingPartitionedTaskCommit.java | 9 +- .../s3a/impl/ITestPartialRenamesDeletes.java | 2 +- .../fs/s3a/impl/ITestRenameDeleteRace.java | 6 +- .../hadoop/fs/s3a/impl/ITestXAttrCost.java | 3 + .../fs/s3a/impl/TestHeaderProcessing.java | 26 +- .../fs/s3a/impl/TestRequestFactory.java | 107 ++- .../fs/s3a/impl/TestSDKStreamDrainer.java | 16 +- .../ITestDirectoryMarkerListing.java | 25 +- .../fs/s3a/prefetch/MockS3ARemoteObject.java | 29 +- .../fs/s3a/prefetch/S3APrefetchFakes.java | 49 +- .../ILoadTestS3ABulkDeleteThrottling.java | 13 +- .../scale/ITestS3ADirectoryPerformance.java | 23 +- .../scale/ITestS3AHugeFilesStorageClass.java | 2 +- .../fs/s3a/select/AbstractS3SelectTest.java | 4 +- .../hadoop/fs/s3a/test/ExtraAssertions.java | 2 +- .../s3a/test/MinimalOperationCallbacks.java | 14 +- .../MinimalWriteOperationHelperCallbacks.java | 16 +- .../org.mockito.plugins.MockMaker | 13 + 164 files changed, 5492 insertions(+), 4122 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/{impl/MultiObjectDeleteSupport.java => MultiObjectDeleteException.java} (61%) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java create mode 100644 hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md rename hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/{SimpleAWSRequestHandler.java => SimpleAWSExecutionInterceptor.java} (68%) create mode 100644 hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index c4dfd2f9d7c11..9ff558e23773a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -185,6 +185,7 @@ 900 1.12.316 2.7.1 + 2.18.19 1.11.2 2.1 0.7 @@ -1138,6 +1139,28 @@ + + software.amazon.awssdk + bundle + ${aws-java-sdk-v2.version} + + + io.netty + * + + + + + software.amazon.awssdk + s3-transfer-manager + ${aws-java-sdk-v2.version}-PREVIEW + + + io.netty + * + + + org.apache.mina mina-core diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 3bd973567c115..89fa02e4c191c 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -499,6 +499,16 @@ aws-java-sdk-bundle compile + + software.amazon.awssdk + bundle + compile + + + software.amazon.awssdk + s3-transfer-manager + compile + org.assertj assertj-core diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java index 482c5a1db7a1f..c5867eeab4f4d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSBadRequestException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * A 400 "Bad Request" exception was received. @@ -36,7 +36,7 @@ public class AWSBadRequestException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSBadRequestException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java new file mode 100644 index 0000000000000..ac1e5f412b86d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +/** + * Enum to map AWS SDK V1 Acl values to SDK V2. + */ +public enum AWSCannedACL { + Private("PRIVATE"), + PublicRead("PUBLIC_READ"), + PublicReadWrite("PUBLIC_READ_WRITE"), + AuthenticatedRead("AUTHENTICATED_READ"), + AwsExecRead("AWS_EXEC_READ"), + BucketOwnerRead("BUCKET_OWNER_READ"), + BucketOwnerFullControl("BUCKET_OWNER_FULL_CONTROL"); + + private final String value; + + AWSCannedACL(String value){ + this.value = value; + } + + public String toString() { return this.value; } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java new file mode 100644 index 0000000000000..00f5a9fbf5d4d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.VersionInfo; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION; +import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; + +/** + * Methods for configuring the S3 client. + * These methods are used when creating and configuring + * {@link software.amazon.awssdk.services.s3.S3Client} which communicates with the S3 service. + */ +public final class AWSClientConfig { + private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class); + + private AWSClientConfig() { + } + + public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { + ClientOverrideConfiguration.Builder overrideConfigBuilder = + ClientOverrideConfiguration.builder(); + + initRequestTimeout(conf, overrideConfigBuilder); + + initUserAgent(conf, overrideConfigBuilder); + + // TODO: Look at signers. See issue https://github.com/aws/aws-sdk-java-v2/issues/1024 + // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); + // if (!signerOverride.isEmpty()) { + // LOG.debug("Signer override = {}", signerOverride); + // overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER) + // } + + return overrideConfigBuilder; + } + + /** + * Configures the http client. + * + * @param conf The Hadoop configuration + * @return Http client builder + */ + public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { + ApacheHttpClient.Builder httpClientBuilder = + ApacheHttpClient.builder(); + + httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + int connectionEstablishTimeout = + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0); + int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); + httpClientBuilder.socketTimeout(Duration.ofSeconds(socketTimeout)); + + // TODO: Need to set ssl socket factory, as done in + // NetworkBinding.bindSSLChannelMode(conf, awsConf); + + return httpClientBuilder; + } + + /** + * Configures the async http client. + * + * @param conf The Hadoop configuration + * @return Http client builder + */ + public static NettyNioAsyncHttpClient.Builder createAsyncHttpClientBuilder(Configuration conf) { + NettyNioAsyncHttpClient.Builder httpClientBuilder = + NettyNioAsyncHttpClient.builder(); + + httpClientBuilder.maxConcurrency(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + int connectionEstablishTimeout = + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0); + int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); + httpClientBuilder.readTimeout(Duration.ofSeconds(socketTimeout)); + httpClientBuilder.writeTimeout(Duration.ofSeconds(socketTimeout)); + + // TODO: Need to set ssl socket factory, as done in + // NetworkBinding.bindSSLChannelMode(conf, awsConf); + + return httpClientBuilder; + } + + /** + * Configures the retry policy. + * + * @param conf The Hadoop configuration + * @return Retry policy builder + */ + public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { + + RetryPolicy.Builder retryPolicyBuilder = RetryPolicy.builder(); + + retryPolicyBuilder.numRetries(S3AUtils.intOption(conf, MAX_ERROR_RETRIES, + DEFAULT_MAX_ERROR_RETRIES, 0)); + + return retryPolicyBuilder; + } + + /** + * Configures the proxy. + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration + * @throws IOException on any IO problem + */ + public static ProxyConfiguration createProxyConfiguration(Configuration conf, + String bucket) throws IOException { + + ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + proxyConfigBuilder.endpoint(buildURI(proxyHost, proxyPort)); + } else { + if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 443)); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 80)); + } + } + final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword, + PROXY_DOMAIN, PROXY_WORKSTATION); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + return proxyConfigBuilder.build(); + } + + /** + * Configures the proxy for the async http client. + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration + * @throws IOException on any IO problem + */ + public static software.amazon.awssdk.http.nio.netty.ProxyConfiguration + createAsyncProxyConfiguration(Configuration conf, + String bucket) throws IOException { + + software.amazon.awssdk.http.nio.netty.ProxyConfiguration.Builder proxyConfigBuilder = + software.amazon.awssdk.http.nio.netty.ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(proxyPort); + } else { + if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(443); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.host(proxyHost); + proxyConfigBuilder.port(80); + } + } + final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + // TODO: check NTLM support + // proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + // proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword, + PROXY_DOMAIN, PROXY_WORKSTATION); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } else { + return null; + } + + return proxyConfigBuilder.build(); + } + + /*** + * Builds a URI, throws an IllegalArgumentException in case of errors. + * + * @param host proxy host + * @param port proxy port + * @return uri with host and port + */ + private static URI buildURI(String host, int port) { + try { + return new URIBuilder().setHost(host).setPort(port).build(); + } catch (URISyntaxException e) { + String msg = + "Proxy error: incorrect " + PROXY_HOST + " or " + PROXY_PORT; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + + /** + * Initializes the User-Agent header to send in HTTP requests to AWS + * services. We always include the Hadoop version number. The user also + * may set an optional custom prefix to put in front of the Hadoop version + * number. The AWS SDK internally appends its own information, which seems + * to include the AWS SDK version, OS and JVM version. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initUserAgent(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + String userAgent = "Hadoop " + VersionInfo.getVersion(); + String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); + if (!userAgentPrefix.isEmpty()) { + userAgent = userAgentPrefix + ", " + userAgent; + } + LOG.debug("Using User-Agent: {}", userAgent); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); + } + + /** + * Configures request timeout. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initRequestTimeout(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + + if (requestTimeoutMillis > Integer.MAX_VALUE) { + LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", + requestTimeoutMillis, Integer.MAX_VALUE); + requestTimeoutMillis = Integer.MAX_VALUE; + } + + if(requestTimeoutMillis > 0) { + clientConfig.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java index d3c5f888c7370..377ffe9b7b56d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java @@ -18,34 +18,33 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; import org.apache.hadoop.util.Preconditions; import java.io.IOException; +import software.amazon.awssdk.core.exception.SdkException; + /** - * IOException equivalent of an {@link AmazonClientException}. + * IOException equivalent of an {@link SdkException}. */ public class AWSClientIOException extends IOException { private final String operation; public AWSClientIOException(String operation, - SdkBaseException cause) { + SdkException cause) { super(cause); Preconditions.checkArgument(operation != null, "Null 'operation' argument"); Preconditions.checkArgument(cause != null, "Null 'cause' argument"); this.operation = operation; } - public AmazonClientException getCause() { - return (AmazonClientException) super.getCause(); + public SdkException getCause() { + return (SdkException) super.getCause(); } @Override public String getMessage() { return operation + ": " + getCause().getMessage(); } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index f4d0a8d091249..228a9b8bd4667 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -27,12 +27,14 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.util.Preconditions; + +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.BasicSessionCredentials; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +45,12 @@ import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException; import org.apache.hadoop.io.IOUtils; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.core.exception.SdkException; + /** * A list of providers. * @@ -51,17 +59,17 @@ *
    *
  1. Allows extra providers to be added dynamically.
  2. *
  3. If any provider in the chain throws an exception other than - * an {@link AmazonClientException}, that is rethrown, rather than + * an {@link SdkException}, that is rethrown, rather than * swallowed.
  4. *
  5. Has some more diagnostics.
  6. - *
  7. On failure, the last "relevant" AmazonClientException raised is + *
  8. On failure, the last "relevant" {@link SdkException} raised is * rethrown; exceptions other than 'no credentials' have priority.
  9. - *
  10. Special handling of {@link AnonymousAWSCredentials}.
  11. + *
  12. Special handling of {@link AnonymousCredentialsProvider}.
  13. *
*/ @InterfaceAudience.Private @InterfaceStability.Evolving -public final class AWSCredentialProviderList implements AWSCredentialsProvider, +public final class AWSCredentialProviderList implements AwsCredentialsProvider, AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger( @@ -73,9 +81,9 @@ public final class AWSCredentialProviderList implements AWSCredentialsProvider, CREDENTIALS_REQUESTED_WHEN_CLOSED = "Credentials requested after provider list was closed"; - private final List providers = new ArrayList<>(1); + private final List providers = new ArrayList<>(1); private boolean reuseLastProvider = true; - private AWSCredentialsProvider lastProvider; + private AwsCredentialsProvider lastProvider; private final AtomicInteger refCount = new AtomicInteger(1); @@ -99,7 +107,9 @@ public AWSCredentialProviderList() { */ public AWSCredentialProviderList( Collection providers) { - this.providers.addAll(providers); + for (AWSCredentialsProvider provider: providers) { + this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); + } } /** @@ -110,6 +120,19 @@ public AWSCredentialProviderList( public AWSCredentialProviderList(final String name, final AWSCredentialsProvider... providerArgs) { setName(name); + for (AWSCredentialsProvider provider: providerArgs) { + this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); + } + } + + /** + * Create with an initial list of SDK V2 credential providers. + * @param name name for error messages, may be "" + * @param providerArgs provider list. + */ + public AWSCredentialProviderList(final String name, + final AwsCredentialsProvider... providerArgs) { + setName(name); Collections.addAll(providers, providerArgs); } @@ -127,12 +150,21 @@ public void setName(final String name) { /** * Add a new provider. - * @param p provider + * @param provider provider */ - public void add(AWSCredentialsProvider p) { - providers.add(p); + public void add(AWSCredentialsProvider provider) { + providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); } + /** + * Add a new SDK V2 provider. + * @param provider provider + */ + public void add(AwsCredentialsProvider provider) { + providers.add(provider); + } + + /** * Add all providers from another list to this one. * @param other the other list. @@ -142,15 +174,18 @@ public void addAll(AWSCredentialProviderList other) { } /** - * Refresh all child entries. + * This method will get credentials using SDK V2's resolveCredentials and then convert it into + * V1 credentials. This required by delegation token binding classes. + * @return SDK V1 credentials */ - @Override - public void refresh() { - if (isClosed()) { - return; - } - for (AWSCredentialsProvider provider : providers) { - provider.refresh(); + public AWSCredentials getCredentials() { + AwsCredentials credentials = resolveCredentials(); + if (credentials instanceof AwsSessionCredentials) { + return new BasicSessionCredentials(credentials.accessKeyId(), + credentials.secretAccessKey(), + ((AwsSessionCredentials) credentials).sessionToken()); + } else { + return new BasicAWSCredentials(credentials.accessKeyId(), credentials.secretAccessKey()); } } @@ -160,7 +195,7 @@ public void refresh() { * @return a set of credentials (possibly anonymous), for authenticating. */ @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { if (isClosed()) { LOG.warn(CREDENTIALS_REQUESTED_WHEN_CLOSED); throw new NoAuthWithAWSException(name + @@ -168,18 +203,18 @@ public AWSCredentials getCredentials() { } checkNotEmpty(); if (reuseLastProvider && lastProvider != null) { - return lastProvider.getCredentials(); + return lastProvider.resolveCredentials(); } - AmazonClientException lastException = null; - for (AWSCredentialsProvider provider : providers) { + SdkException lastException = null; + for (AwsCredentialsProvider provider : providers) { try { - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); Preconditions.checkNotNull(credentials, "Null credentials returned by %s", provider); - if ((credentials.getAWSAccessKeyId() != null && - credentials.getAWSSecretKey() != null) - || (credentials instanceof AnonymousAWSCredentials)) { + if ((credentials.accessKeyId() != null && credentials.secretAccessKey() != null) || ( + provider instanceof AnonymousCredentialsProvider + || provider instanceof AnonymousAWSCredentialsProvider)) { lastProvider = provider; LOG.debug("Using credentials from {}", provider); return credentials; @@ -196,7 +231,7 @@ public AWSCredentials getCredentials() { } LOG.debug("No credentials from {}: {}", provider, e.toString()); - } catch (AmazonClientException e) { + } catch (SdkException e) { lastException = e; LOG.debug("No credentials provided by {}: {}", provider, e.toString(), e); @@ -223,13 +258,13 @@ public AWSCredentials getCredentials() { * @return providers */ @VisibleForTesting - List getProviders() { + List getProviders() { return providers; } /** * Verify that the provider list is not empty. - * @throws AmazonClientException if there are no providers. + * @throws SdkException if there are no providers. */ public void checkNotEmpty() { if (providers.isEmpty()) { @@ -317,7 +352,7 @@ public void close() { } // do this outside the synchronized block. - for (AWSCredentialsProvider p : providers) { + for (AwsCredentialsProvider p : providers) { if (p instanceof Closeable) { IOUtils.closeStream((Closeable) p); } else if (p instanceof AutoCloseable) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java index e6a23b2361da9..e4adc69bec642 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java @@ -18,14 +18,14 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Status code 443, no response from server. This is considered idempotent. */ public class AWSNoResponseException extends AWSServiceIOException { public AWSNoResponseException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java index bb337ee5eebda..cb478482a8ed4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSRedirectException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Request is redirected. @@ -32,7 +32,7 @@ public class AWSRedirectException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSRedirectException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java index 014d217b6a4fb..de1dd8b4a7a18 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSS3IOException.java @@ -18,14 +18,13 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.AmazonS3Exception; +import software.amazon.awssdk.services.s3.model.S3Exception; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import java.util.Map; - /** - * Wrap a {@link AmazonS3Exception} as an IOE, relaying all + * Wrap a {@link S3Exception} as an IOE, relaying all * getters. */ @InterfaceAudience.Public @@ -38,24 +37,12 @@ public class AWSS3IOException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSS3IOException(String operation, - AmazonS3Exception cause) { + S3Exception cause) { super(operation, cause); } - public AmazonS3Exception getCause() { - return (AmazonS3Exception) super.getCause(); - } - - public String getErrorResponseXml() { - return getCause().getErrorResponseXml(); - } - - public Map getAdditionalDetails() { - return getCause().getAdditionalDetails(); - } - - public String getExtendedRequestId() { - return getCause().getExtendedRequestId(); + public S3Exception getCause() { + return (S3Exception) super.getCause(); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java index a9c2c9840203f..49bb8ec09700d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java @@ -18,13 +18,15 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; + /** * A specific exception from AWS operations. - * The exception must always be created with an {@link AmazonServiceException}. + * The exception must always be created with an {@link AwsServiceException}. * The attributes of this exception can all be directly accessed. */ @InterfaceAudience.Public @@ -37,36 +39,31 @@ public class AWSServiceIOException extends AWSClientIOException { * @param cause the underlying cause */ public AWSServiceIOException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } - public AmazonServiceException getCause() { - return (AmazonServiceException) super.getCause(); - } - - public String getRequestId() { - return getCause().getRequestId(); + public AwsServiceException getCause() { + return (AwsServiceException) super.getCause(); } - public String getServiceName() { - return getCause().getServiceName(); + public boolean retryable() { + return getCause().retryable(); } - public String getErrorCode() { - return getCause().getErrorCode(); + public String requestId() { + return getCause().requestId(); } - public int getStatusCode() { - return getCause().getStatusCode(); + public AwsErrorDetails awsErrorDetails() { + return getCause().awsErrorDetails(); } - public String getRawResponseContent() { - return getCause().getRawResponseContent(); + public int statusCode() { + return getCause().statusCode(); } - public boolean isRetryable() { - return getCause().isRetryable(); + public String extendedRequestId() { + return getCause().extendedRequestId(); } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java index 131cea7562242..ba9f1b0ac1f51 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceThrottledException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * Exception raised when a service was throttled. @@ -36,7 +36,7 @@ public class AWSServiceThrottledException extends AWSServiceIOException { * @param cause the underlying cause */ public AWSServiceThrottledException(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java index 83be294fac7cd..1e13690bf9dc8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSStatus500Exception.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * A 500 response came back from a service. @@ -31,7 +31,7 @@ */ public class AWSStatus500Exception extends AWSServiceIOException { public AWSStatus500Exception(String operation, - AmazonServiceException cause) { + AwsServiceException cause) { super(operation, cause); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java index 564c03bf731d7..dcfc2a03b1232 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AnonymousAWSCredentialsProvider.java @@ -18,9 +18,10 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -35,23 +36,18 @@ * property fs.s3a.aws.credentials.provider. Therefore, changing the class name * would be a backward-incompatible change. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Private @InterfaceStability.Stable -@Deprecated -public class AnonymousAWSCredentialsProvider implements AWSCredentialsProvider { +public class AnonymousAWSCredentialsProvider implements AwsCredentialsProvider { public static final String NAME = "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"; - public AWSCredentials getCredentials() { - return new AnonymousAWSCredentials(); + public AwsCredentials resolveCredentials() { + return AnonymousCredentialsProvider.create().resolveCredentials(); } - public void refresh() {} - @Override public String toString() { return getClass().getSimpleName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 0294f7722905d..7c0283b1cec67 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -20,7 +20,7 @@ import javax.annotation.Nonnull; -import com.amazonaws.arn.Arn; +import software.amazon.awssdk.arns.Arn; /** * Represents an Arn Resource, this can be an accesspoint or bucket. @@ -120,14 +120,14 @@ public String getEndpoint() { public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentException { Arn parsed = Arn.fromString(arn); - if (parsed.getRegion().isEmpty() || parsed.getAccountId().isEmpty() || - parsed.getResourceAsString().isEmpty()) { + if (!parsed.region().isPresent() || !parsed.accountId().isPresent() || + parsed.resourceAsString().isEmpty()) { throw new IllegalArgumentException( String.format("Access Point Arn %s has an invalid format or missing properties", arn)); } - String resourceName = parsed.getResource().getResource(); - return new ArnResource(resourceName, parsed.getAccountId(), parsed.getRegion(), - parsed.getPartition(), arn); + String resourceName = parsed.resource().resource(); + return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(), + parsed.partition(), arn); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index a59a07c84379e..6f4ef6c178e72 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -140,7 +140,6 @@ private Constants() { public static final String ASSUMED_ROLE_POLICY = "fs.s3a.assumed.role.policy"; - @SuppressWarnings("deprecation") public static final String ASSUMED_ROLE_CREDENTIALS_DEFAULT = SimpleAWSCredentialsProvider.NAME; @@ -735,7 +734,6 @@ private Constants() { @InterfaceAudience.Private @InterfaceStability.Unstable - @SuppressWarnings("deprecation") public static final Class DEFAULT_S3_CLIENT_FACTORY_IMPL = DefaultS3ClientFactory.class; @@ -1276,4 +1274,8 @@ private Constants() { public static final String STORE_CAPABILITY_DIRECTORY_MARKER_MULTIPART_UPLOAD_ENABLED = "fs.s3a.capability.multipart.uploads.enabled"; + /** + * The bucket region header. + */ + public static final String BUCKET_REGION_HEADER = "x-amz-bucket-region"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java index 2f0cfd37ad37c..92e8c99bb2962 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonClientException; +import software.amazon.awssdk.core.exception.SdkException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -26,8 +26,8 @@ /** * Exception which Hadoop's AWSCredentialsProvider implementations should * throw when there is a problem with the credential setup. This - * is a subclass of {@link AmazonClientException} which sets - * {@link #isRetryable()} to false, so as to fail fast. + * is a subclass of {@link SdkException} which sets + * {@link #retryable()} to false, so as to fail fast. * This is used in credential providers and elsewhere. * When passed through {@code S3AUtils.translateException()} it * is mapped to an AccessDeniedException. As a result, the Invoker @@ -35,13 +35,14 @@ */ @InterfaceAudience.Public @InterfaceStability.Stable -public class CredentialInitializationException extends AmazonClientException { +public class CredentialInitializationException extends SdkException { + public CredentialInitializationException(String message, Throwable t) { - super(message, t); + super(builder().message(message).cause(t)); } public CredentialInitializationException(String message) { - super(message); + super(builder().message(message)); } /** @@ -49,7 +50,5 @@ public CredentialInitializationException(String message) { * @return false, always. */ @Override - public boolean isRetryable() { - return false; - } + public boolean retryable() { return false; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index f724f86e4afcd..af80beae66d09 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -20,11 +20,12 @@ import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.regions.RegionUtils; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Builder; @@ -41,11 +42,28 @@ import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider; import com.amazonaws.util.AwsHostNameUtils; import com.amazonaws.util.RuntimeHttpUtils; + import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3BaseClientBuilder; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -57,12 +75,16 @@ import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; +import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; -import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; /** * The default {@link S3ClientFactory} implementation. @@ -71,7 +93,6 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -@SuppressWarnings("deprecation") public class DefaultS3ClientFactory extends Configured implements S3ClientFactory { @@ -108,6 +129,7 @@ public class DefaultS3ClientFactory extends Configured * Create the client by preparing the AwsConf configuration * and then invoking {@code buildAmazonS3Client()}. */ + // TODO: Remove this and all code that configures the V1 S3 client. @Override public AmazonS3 createS3Client( final URI uri, @@ -157,10 +179,123 @@ public AmazonS3 createS3Client( } } catch (SdkClientException e) { // SDK refused to build. - throw translateException("creating AWS S3 client", uri.toString(), e); + // TODO: remove? + //throw translateException("creating AWS S3 client", uri.toString(), e); + throw new IOException("creating AWS S3 client: "+ uri.toString(), e); } } + @Override + public S3Client createS3ClientV2( + final URI uri, + final S3ClientCreationParameters parameters) throws IOException { + + Configuration conf = getConf(); + bucket = uri.getHost(); + + ApacheHttpClient.Builder httpClientBuilder = AWSClientConfig + .createHttpClientBuilder(conf) + .proxyConfiguration(AWSClientConfig.createProxyConfiguration(conf, bucket)); + return configureClientBuilder(S3Client.builder(), parameters, conf, bucket) + .httpClientBuilder(httpClientBuilder) + .build(); + } + + @Override + public S3AsyncClient createS3AsyncClient( + final URI uri, + final S3ClientCreationParameters parameters) throws IOException { + + Configuration conf = getConf(); + bucket = uri.getHost(); + NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig + .createAsyncHttpClientBuilder(conf) + .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket)); + return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) + .httpClientBuilder(httpClientBuilder) + .build(); + } + + /** + * Configure a sync or async S3 client builder. + * This method handles all shared configuration. + * @param builder S3 client builder + * @param parameters parameter object + * @param conf configuration object + * @param bucket bucket name + * @return the builder object + * @param S3 client builder type + * @param S3 client type + */ + private , ClientT> + BuilderT configureClientBuilder( + BuilderT builder, + S3ClientCreationParameters parameters, + Configuration conf, + String bucket) { + + URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); + Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, + parameters.getCredentialSet()); + LOG.debug("Using endpoint {}; and region {}", endpoint, region); + + // TODO: Some configuration done in configureBasicParams is not done yet. + S3Configuration serviceConfiguration = S3Configuration.builder() + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + // TODO: Review. Currently required to pass access point tests in ITestS3ABucketExistence, + // but resolving the region from the ap may be the correct solution. + .useArnRegionEnabled(true) + .build(); + + return builder + .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) + .credentialsProvider(parameters.getCredentialSet()) + .endpointOverride(endpoint) + .region(region) + .serviceConfiguration(serviceConfiguration); + } + + /** + * Create an override configuration for an S3 client. + * @param parameters parameter object + * @param conf configuration object + * @return the override configuration + */ + protected ClientOverrideConfiguration createClientOverrideConfiguration( + S3ClientCreationParameters parameters, Configuration conf) { + final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = + AWSClientConfig.createClientConfigBuilder(conf); + + // add any headers + parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); + + if (parameters.isRequesterPays()) { + // All calls must acknowledge requester will pay via header. + clientOverrideConfigBuilder.putHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE); + } + + if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) { + clientOverrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_SUFFIX, + parameters.getUserAgentSuffix()); + } + + if (parameters.getExecutionInterceptors() != null) { + for (ExecutionInterceptor interceptor : parameters.getExecutionInterceptors()) { + clientOverrideConfigBuilder.addExecutionInterceptor(interceptor); + } + } + + if (parameters.getMetrics() != null) { + clientOverrideConfigBuilder.addMetricPublisher( + new AwsStatisticsCollector(parameters.getMetrics())); + } + + final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); + + return clientOverrideConfigBuilder.build(); + } + /** * Create an {@link AmazonS3} client of type * {@link AmazonS3EncryptionV2} if CSE is enabled. @@ -252,18 +387,11 @@ protected AmazonS3 buildAmazonS3Client( */ private void configureBasicParams(AmazonS3Builder builder, ClientConfiguration awsConf, S3ClientCreationParameters parameters) { - builder.withCredentials(parameters.getCredentialSet()); + // TODO: This whole block will be removed when we remove the V1 client. + // builder.withCredentials(parameters.getCredentialSet()); builder.withClientConfiguration(awsConf); builder.withPathStyleAccessEnabled(parameters.isPathStyleAccess()); - if (parameters.getMetrics() != null) { - builder.withMetricsCollector( - new AwsStatisticsCollector(parameters.getMetrics())); - } - if (parameters.getRequestHandlers() != null) { - builder.withRequestHandlers( - parameters.getRequestHandlers().toArray(new RequestHandler2[0])); - } if (parameters.getMonitoringListener() != null) { builder.withMonitoringListener(parameters.getMonitoringListener()); } @@ -391,4 +519,74 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, endpoint, epr, region); return new AwsClientBuilder.EndpointConfiguration(endpoint, region); } + + /** + * Given a endpoint string, create the endpoint URI. + * + * @param endpoint possibly null endpoint. + * @param conf config to build the URI from. + * @return an endpoint uri + */ + private static URI getS3Endpoint(String endpoint, final Configuration conf) { + + boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); + + String protocol = secureConnections ? "https" : "http"; + + if (endpoint == null || endpoint.isEmpty()) { + // the default endpoint + endpoint = CENTRAL_ENDPOINT; + } + + if (!endpoint.contains("://")) { + endpoint = String.format("%s://%s", protocol, endpoint); + } + + try { + return new URI(endpoint); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Get the bucket region. + * + * @param region AWS S3 Region set in the config. This property may not be set, in which case + * ask S3 for the region. + * @param bucket Bucket name. + * @param credentialsProvider Credentials provider to be used with the default s3 client. + * @return region of the bucket. + */ + private static Region getS3Region(String region, String bucket, + AwsCredentialsProvider credentialsProvider) { + + if (!StringUtils.isBlank(region)) { + return Region.of(region); + } + + try { + // build a s3 client with region eu-west-1 that can be used to get the region of the bucket. + // Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com to + // the actual region the bucket is in. As the request is signed with us-east-1 and not the + // bucket's region, it fails. + S3Client s3Client = S3Client.builder().region(Region.EU_WEST_1) + .credentialsProvider(credentialsProvider) + .build(); + + HeadBucketResponse headBucketResponse = + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + return Region.of( + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + List bucketRegion = + exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER); + return Region.of(bucketRegion.get(0)); + } + } + + return Region.US_EAST_1; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java index cfd7046e8abfa..4bf81817efc5d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/FailureInjectionPolicy.java @@ -36,7 +36,7 @@ public class FailureInjectionPolicy { public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME"; private static final Logger LOG = - LoggerFactory.getLogger(InconsistentAmazonS3Client.class); + LoggerFactory.getLogger(FailureInjectionPolicy.class); /** * Probability of throttling a request. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java deleted file mode 100644 index c6d17a32b64b1..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsResult; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -/** - * A wrapper around {@link com.amazonaws.services.s3.AmazonS3} that injects - * failures. - * It used to also inject inconsistency, but this was removed with S3Guard; - * what is retained is the ability to throttle AWS operations and for the - * input stream to be inconsistent. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class InconsistentAmazonS3Client extends AmazonS3Client { - - private static final Logger LOG = - LoggerFactory.getLogger(InconsistentAmazonS3Client.class); - - private FailureInjectionPolicy policy; - - /** - * Counter of failures since last reset. - */ - private final AtomicLong failureCounter = new AtomicLong(0); - - - /** - * Instantiate. - * This subclasses a deprecated constructor of the parent - * {@code AmazonS3Client} class; we can't use the builder API because, - * that only creates the consistent client. - * @param credentials credentials to auth. - * @param clientConfiguration connection settings - * @param conf hadoop configuration. - */ - @SuppressWarnings("deprecation") - public InconsistentAmazonS3Client(AWSCredentialsProvider credentials, - ClientConfiguration clientConfiguration, Configuration conf) { - super(credentials, clientConfiguration); - policy = new FailureInjectionPolicy(conf); - } - - /** - * A way for tests to patch in a different fault injection policy at runtime. - * @param fs filesystem under test - * @param policy failure injection settings to set - * @throws Exception on failure - */ - public static void setFailureInjectionPolicy(S3AFileSystem fs, - FailureInjectionPolicy policy) throws Exception { - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("s3guard"); - InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3); - ic.replacePolicy(policy); - } - - private void replacePolicy(FailureInjectionPolicy pol) { - this.policy = pol; - } - - @Override - public String toString() { - return String.format("Inconsistent S3 Client: %s; failure count %d", - policy, failureCounter.get()); - } - - /** - * Convenience function for test code to cast from supertype. - * @param c supertype to cast from - * @return subtype, not null - * @throws Exception on error - */ - public static InconsistentAmazonS3Client castFrom(AmazonS3 c) throws - Exception { - InconsistentAmazonS3Client ic = null; - if (c instanceof InconsistentAmazonS3Client) { - ic = (InconsistentAmazonS3Client) c; - } - Preconditions.checkNotNull(ic, "Not an instance of " + - "InconsistentAmazonS3Client"); - return ic; - } - - @Override - public DeleteObjectsResult deleteObjects(DeleteObjectsRequest - deleteObjectsRequest) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.deleteObjects(deleteObjectsRequest); - } - - @Override - public void deleteObject(DeleteObjectRequest deleteObjectRequest) - throws AmazonClientException, AmazonServiceException { - String key = deleteObjectRequest.getKey(); - LOG.debug("key {}", key); - maybeFail(); - super.deleteObject(deleteObjectRequest); - } - - /* We should only need to override this version of putObject() */ - @Override - public PutObjectResult putObject(PutObjectRequest putObjectRequest) - throws AmazonClientException, AmazonServiceException { - LOG.debug("key {}", putObjectRequest.getKey()); - maybeFail(); - return super.putObject(putObjectRequest); - } - - /* We should only need to override these versions of listObjects() */ - @Override - public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.listObjects(listObjectsRequest); - } - - /* consistent listing with possibility of failing. */ - @Override - public ListObjectsV2Result listObjectsV2(ListObjectsV2Request request) - throws AmazonClientException, AmazonServiceException { - maybeFail(); - return super.listObjectsV2(request); - } - - - @Override - public CompleteMultipartUploadResult completeMultipartUpload( - CompleteMultipartUploadRequest completeMultipartUploadRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.completeMultipartUpload(completeMultipartUploadRequest); - } - - @Override - public UploadPartResult uploadPart(UploadPartRequest uploadPartRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.uploadPart(uploadPartRequest); - } - - @Override - public InitiateMultipartUploadResult initiateMultipartUpload( - InitiateMultipartUploadRequest initiateMultipartUploadRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.initiateMultipartUpload(initiateMultipartUploadRequest); - } - - @Override - public MultipartUploadListing listMultipartUploads( - ListMultipartUploadsRequest listMultipartUploadsRequest) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.listMultipartUploads(listMultipartUploadsRequest); - } - - /** - * Set the probability of throttling a request. - * @param throttleProbability the probability of a request being throttled. - */ - public void setThrottleProbability(float throttleProbability) { - policy.setThrottleProbability(throttleProbability); - } - - /** - * Conditionally fail the operation. - * @param errorMsg description of failure - * @param statusCode http status code for error - * @throws AmazonClientException if the client chooses to fail - * the request. - */ - private void maybeFail(String errorMsg, int statusCode) - throws AmazonClientException { - // code structure here is to line up for more failures later - AmazonServiceException ex = null; - if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) { - // throttle the request - ex = new AmazonServiceException(errorMsg - + " count = " + (failureCounter.get() + 1), null); - ex.setStatusCode(statusCode); - } - - int failureLimit = policy.getFailureLimit(); - if (ex != null) { - long count = failureCounter.incrementAndGet(); - if (failureLimit == 0 - || (failureLimit > 0 && count < failureLimit)) { - throw ex; - } - } - } - - private void maybeFail() { - maybeFail("throttled", 503); - } - - /** - * Set the limit on failures before all operations pass through. - * This resets the failure count. - * @param limit limit; "0" means "no limit" - */ - public void setFailureLimit(int limit) { - policy.setFailureLimit(limit); - failureCounter.set(0); - } - - @Override - public S3Object getObject(GetObjectRequest var1) throws SdkClientException, - AmazonServiceException { - maybeFail(); - return super.getObject(var1); - } - - @Override - public S3Object getObject(String bucketName, String key) - throws SdkClientException, AmazonServiceException { - maybeFail(); - return super.getObject(bucketName, key); - - } - - /** Since ObjectListing is immutable, we just override it with wrapper. */ - @SuppressWarnings("serial") - private static class CustomObjectListing extends ObjectListing { - - private final List customListing; - private final List customPrefixes; - - CustomObjectListing(ObjectListing rawListing, - List customListing, - List customPrefixes) { - super(); - this.customListing = customListing; - this.customPrefixes = customPrefixes; - - this.setBucketName(rawListing.getBucketName()); - this.setCommonPrefixes(rawListing.getCommonPrefixes()); - this.setDelimiter(rawListing.getDelimiter()); - this.setEncodingType(rawListing.getEncodingType()); - this.setMarker(rawListing.getMarker()); - this.setMaxKeys(rawListing.getMaxKeys()); - this.setNextMarker(rawListing.getNextMarker()); - this.setPrefix(rawListing.getPrefix()); - this.setTruncated(rawListing.isTruncated()); - } - - @Override - public List getObjectSummaries() { - return customListing; - } - - @Override - public List getCommonPrefixes() { - return customPrefixes; - } - } - - @SuppressWarnings("serial") - private static class CustomListObjectsV2Result extends ListObjectsV2Result { - - private final List customListing; - private final List customPrefixes; - - CustomListObjectsV2Result(ListObjectsV2Result raw, - List customListing, List customPrefixes) { - super(); - this.customListing = customListing; - this.customPrefixes = customPrefixes; - - this.setBucketName(raw.getBucketName()); - this.setCommonPrefixes(raw.getCommonPrefixes()); - this.setDelimiter(raw.getDelimiter()); - this.setEncodingType(raw.getEncodingType()); - this.setStartAfter(raw.getStartAfter()); - this.setMaxKeys(raw.getMaxKeys()); - this.setContinuationToken(raw.getContinuationToken()); - this.setPrefix(raw.getPrefix()); - this.setTruncated(raw.isTruncated()); - } - - @Override - public List getObjectSummaries() { - return customListing; - } - - @Override - public List getCommonPrefixes() { - return customPrefixes; - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index 4bfcc8aba3af3..c1f6bd6f1fd1a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -18,11 +18,18 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.s3.AmazonS3; +import java.util.concurrent.atomic.AtomicLong; + +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; /** * S3 Client factory used for testing with eventual consistency fault injection. @@ -30,25 +37,74 @@ * {@code hadoop-aws} module to enable integration tests to use this * just by editing the Hadoop configuration used to bring up the client. * - * The factory uses the older constructor-based instantiation/configuration - * of the client, so does not wire up metrics, handlers etc. + * The factory injects an {@link ExecutionInterceptor} to inject failures. */ @InterfaceAudience.Private @InterfaceStability.Unstable public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { @Override - protected AmazonS3 buildAmazonS3Client( - final ClientConfiguration awsConf, - final S3ClientCreationParameters parameters) { + protected ClientOverrideConfiguration createClientOverrideConfiguration( + S3ClientCreationParameters parameters, Configuration conf) { LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **"); LOG.warn("List inconsistency is no longer emulated; only throttling and read errors"); - InconsistentAmazonS3Client s3 - = new InconsistentAmazonS3Client( - parameters.getCredentialSet(), awsConf, getConf()); - configureAmazonS3Client(s3, - parameters.getEndpoint(), - parameters.isPathStyleAccess()); - return s3; + return super.createClientOverrideConfiguration(parameters, conf) + .toBuilder() + .addExecutionInterceptor(new FailureInjectionInterceptor( + new FailureInjectionPolicy(conf))) + .build(); + } + + private static class FailureInjectionInterceptor implements ExecutionInterceptor { + + private final FailureInjectionPolicy policy; + + /** + * Counter of failures since last reset. + */ + private final AtomicLong failureCounter = new AtomicLong(0); + + public FailureInjectionInterceptor(FailureInjectionPolicy policy) { + this.policy = policy; + } + + @Override + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + maybeFail(); + } + + private void maybeFail() { + maybeFail("throttled", 503); + } + + /** + * Conditionally fail the operation. + * @param errorMsg description of failure + * @param statusCode http status code for error + * @throws SdkException if the client chooses to fail + * the request. + */ + private void maybeFail(String errorMsg, int statusCode) + throws SdkException { + // code structure here is to line up for more failures later + AwsServiceException ex = null; + if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) { + // throttle the request + ex = AwsServiceException.builder() + .message(errorMsg + " count = " + (failureCounter.get() + 1)) + .statusCode(statusCode) + .build(); + } + + int failureLimit = policy.getFailureLimit(); + if (ex != null) { + long count = failureCounter.incrementAndGet(); + if (failureLimit == 0 + || (failureLimit > 0 && count < failureLimit)) { + throw ex; + } + } + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 279bfeba98769..58e65530c235e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -24,8 +24,6 @@ import java.util.concurrent.Future; import javax.annotation.Nullable; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,6 +37,8 @@ import org.apache.hadoop.util.functional.InvocationRaisingIOE; import org.apache.hadoop.util.Preconditions; +import software.amazon.awssdk.core.exception.SdkException; + import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration; /** @@ -120,7 +120,7 @@ public static T once(String action, String path, throws IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) { return operation.apply(); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -145,7 +145,7 @@ public static T onceTrackingDuration( throws IOException { try { return invokeTrackingDuration(tracker, operation); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -170,7 +170,7 @@ public static void once(String action, String path, /** * - * Wait for a future, translating AmazonClientException into an IOException. + * Wait for a future, translating SdkException into an IOException. * @param action action to execute (used in error messages) * @param path path of work (used in error messages) * @param future future to await for @@ -186,7 +186,7 @@ public static T onceInTheFuture(String action, throws IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) { return FutureIO.awaitFuture(future); - } catch (AmazonClientException e) { + } catch (SdkException e) { throw S3AUtils.translateException(action, path, e); } } @@ -466,7 +466,7 @@ public T retryUntranslated( } // execute the operation, returning if successful return operation.apply(); - } catch (IOException | SdkBaseException e) { + } catch (IOException | SdkException e) { caught = e; } // you only get here if the operation didn't complete @@ -478,7 +478,7 @@ public T retryUntranslated( translated = (IOException) caught; } else { translated = S3AUtils.translateException(text, "", - (SdkBaseException)caught); + (SdkException) caught); } try { @@ -517,11 +517,10 @@ public T retryUntranslated( if (caught instanceof IOException) { throw (IOException) caught; } else { - throw (SdkBaseException) caught; + throw (SdkException) caught; } } - /** * Execute an operation; any exception raised is simply caught and * logged at debug. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index 6c39cc4b64240..b4674159ea473 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.VisibleForTesting; @@ -39,6 +38,8 @@ import org.apache.hadoop.util.functional.RemoteIterators; import org.slf4j.Logger; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.S3Object; import java.io.Closeable; import java.io.IOException; @@ -277,19 +278,19 @@ public S3ListRequest createListObjectsRequest(String key, } /** - * Interface to implement by the logic deciding whether to accept a summary + * Interface to implement the logic deciding whether to accept a s3Object * entry or path as a valid file or directory. */ interface FileStatusAcceptor { /** - * Predicate to decide whether or not to accept a summary entry. + * Predicate to decide whether or not to accept a s3Object entry. * @param keyPath qualified path to the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ - boolean accept(Path keyPath, S3ObjectSummary summary); + boolean accept(Path keyPath, S3Object s3Object); /** * Predicate to decide whether or not to accept a prefix. @@ -451,21 +452,21 @@ private boolean buildNextStatusBatch(S3ListResult objects) { int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. List stats = new ArrayList<>( - objects.getObjectSummaries().size() + + objects.getS3Objects().size() + objects.getCommonPrefixes().size()); // objects - for (S3ObjectSummary summary : objects.getObjectSummaries()) { - String key = summary.getKey(); + for (S3Object s3Object : objects.getS3Objects()) { + String key = s3Object.key(); Path keyPath = getStoreContext().getContextAccessors().keyToPath(key); if (LOG.isDebugEnabled()) { - LOG.debug("{}: {}", keyPath, stringify(summary)); + LOG.debug("{}: {}", keyPath, stringify(s3Object)); } // Skip over keys that are ourselves and old S3N _$folder$ files - if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) { - S3AFileStatus status = createFileStatus(keyPath, summary, + if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) { + S3AFileStatus status = createFileStatus(keyPath, s3Object, listingOperationCallbacks.getDefaultBlockSize(keyPath), getStoreContext().getUsername(), - summary.getETag(), null, isCSEEnabled); + s3Object.eTag(), null, isCSEEnabled); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -476,11 +477,11 @@ private boolean buildNextStatusBatch(S3ListResult objects) { } // prefixes: always directories - for (String prefix : objects.getCommonPrefixes()) { + for (CommonPrefix prefix : objects.getCommonPrefixes()) { Path keyPath = getStoreContext() .getContextAccessors() - .keyToPath(prefix); - if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) { + .keyToPath(prefix.prefix()); + if (acceptor.accept(keyPath, prefix.prefix()) && filter.accept(keyPath)) { S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, getStoreContext().getUsername()); LOG.debug("Adding directory: {}", status); @@ -731,18 +732,18 @@ public AcceptFilesOnly(Path qualifiedPath) { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) - && !summary.getKey().endsWith(S3N_FOLDER_SUFFIX) - && !objectRepresentsDirectory(summary.getKey()); + && !s3Object.key().endsWith(S3N_FOLDER_SUFFIX) + && !objectRepresentsDirectory(s3Object.key()); } /** @@ -767,8 +768,8 @@ public boolean accept(FileStatus status) { */ static class AcceptAllButS3nDirs implements FileStatusAcceptor { - public boolean accept(Path keyPath, S3ObjectSummary summary) { - return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + public boolean accept(Path keyPath, S3Object s3Object) { + return !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } public boolean accept(Path keyPath, String prefix) { @@ -799,17 +800,17 @@ public AcceptAllButSelfAndS3nDirs(Path qualifiedPath) { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated.) */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) && - !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java similarity index 61% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java index 96e32f362dfd9..fdc60a638a77e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteSupport.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java @@ -16,32 +16,37 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a.impl; +package org.apache.hadoop.fs.s3a; import java.io.IOException; import java.nio.file.AccessDeniedException; import java.util.List; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.s3a.AWSS3IOException; +import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_200_OK; /** - * Support for Multi Object Deletion. - * This is used to be a complex piece of code as it was required to - * update s3guard. - * Now all that is left is the exception extraction for better - * reporting, + * Exception raised in {@link S3AFileSystem#deleteObjects} when + * one or more of the keys could not be deleted. + * + * Used to reproduce the behaviour of SDK v1 for partial failures + * on DeleteObjects. In SDK v2, the errors are returned as part of + * the response objects. */ -public final class MultiObjectDeleteSupport { +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class MultiObjectDeleteException extends S3Exception { private static final Logger LOG = LoggerFactory.getLogger( - MultiObjectDeleteSupport.class); - - private MultiObjectDeleteSupport() { - } + MultiObjectDeleteException.class); /** * This is the exception exit code if access was denied on a delete. @@ -49,6 +54,15 @@ private MultiObjectDeleteSupport() { */ public static final String ACCESS_DENIED = "AccessDenied"; + private final List errors; + + public MultiObjectDeleteException(List errors) { + super(builder().message(errors.toString()).statusCode(SC_200_OK)); + this.errors = errors; + } + + public List errors() { return errors; } + /** * A {@code MultiObjectDeleteException} is raised if one or more * paths listed in a bulk DELETE operation failed. @@ -58,29 +72,23 @@ private MultiObjectDeleteSupport() { * the causes, otherwise grabs the status code and uses it in the * returned exception. * @param message text for the exception - * @param deleteException the delete exception. to translate * @return an IOE with more detail. */ - public static IOException translateDeleteException( - final String message, - final MultiObjectDeleteException deleteException) { - List errors - = deleteException.getErrors(); + public IOException translateException(final String message) { LOG.info("Bulk delete operation failed to delete all objects;" + " failure count = {}", - errors.size()); + errors().size()); final StringBuilder result = new StringBuilder( - errors.size() * 256); + errors().size() * 256); result.append(message).append(": "); String exitCode = ""; - for (MultiObjectDeleteException.DeleteError error : - deleteException.getErrors()) { - String code = error.getCode(); - String item = String.format("%s: %s%s: %s%n", code, error.getKey(), - (error.getVersionId() != null - ? (" (" + error.getVersionId() + ")") + for (S3Error error : errors()) { + String code = error.code(); + String item = String.format("%s: %s%s: %s%n", code, error.key(), + (error.versionId() != null + ? (" (" + error.versionId() + ")") : ""), - error.getMessage()); + error.message()); LOG.info(item); result.append(item); if (exitCode == null || exitCode.isEmpty() || ACCESS_DENIED.equals(code)) { @@ -89,9 +97,9 @@ public static IOException translateDeleteException( } if (ACCESS_DENIED.equals(exitCode)) { return (IOException) new AccessDeniedException(result.toString()) - .initCause(deleteException); + .initCause(this); } else { - return new AWSS3IOException(result.toString(), deleteException); + return new AWSS3IOException(result.toString(), this); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java index d8c820cd8a121..c471e052d4f08 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java @@ -23,13 +23,14 @@ import java.util.NoSuchElementException; import javax.annotation.Nullable; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.MultipartUploadListing; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; + import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.StoreContext; @@ -43,7 +44,7 @@ * MultipartUtils upload-specific functions for use by S3AFileSystem and Hadoop * CLI. * The Audit span active when - * {@link #listMultipartUploads(StoreContext, AmazonS3, String, int)} + * {@link #listMultipartUploads(StoreContext, S3Client, String, int)} * was invoked is retained for all subsequent operations. */ public final class MultipartUtils { @@ -67,7 +68,7 @@ private MultipartUtils() { } */ static MultipartUtils.UploadIterator listMultipartUploads( final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, @Nullable String prefix, int maxKeys) throws IOException { @@ -84,14 +85,14 @@ static MultipartUtils.UploadIterator listMultipartUploads( * at the time the iterator was constructed. */ static class ListingIterator implements - RemoteIterator { + RemoteIterator { private final String prefix; private final RequestFactory requestFactory; private final int maxKeys; - private final AmazonS3 s3; + private final S3Client s3; private final Invoker invoker; private final AuditSpan auditSpan; @@ -101,7 +102,7 @@ static class ListingIterator implements /** * Most recent listing results. */ - private MultipartUploadListing listing; + private ListMultipartUploadsResponse listing; /** * Indicator that this is the first listing. @@ -114,7 +115,7 @@ static class ListingIterator implements private int listCount = 0; ListingIterator(final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, @Nullable String prefix, int maxKeys) throws IOException { this.storeContext = storeContext; @@ -153,7 +154,7 @@ public boolean hasNext() throws IOException { */ @Override @Retries.RetryTranslated - public MultipartUploadListing next() throws IOException { + public ListMultipartUploadsResponse next() throws IOException { if (firstListing) { firstListing = false; } else { @@ -171,32 +172,34 @@ public MultipartUploadListing next() throws IOException { public String toString() { return "Upload iterator: prefix " + prefix + "; list count " + listCount - + "; upload count " + listing.getMultipartUploads().size() + + "; upload count " + listing.uploads().size() + "; isTruncated=" + listing.isTruncated(); } @Retries.RetryTranslated private void requestNextBatch() throws IOException { try (AuditSpan span = auditSpan.activate()) { - ListMultipartUploadsRequest req = requestFactory - .newListMultipartUploadsRequest(prefix); + ListMultipartUploadsRequest.Builder requestBuilder = requestFactory + .newListMultipartUploadsRequestBuilder(prefix); if (!firstListing) { - req.setKeyMarker(listing.getNextKeyMarker()); - req.setUploadIdMarker(listing.getNextUploadIdMarker()); + requestBuilder.keyMarker(listing.nextKeyMarker()); + requestBuilder.uploadIdMarker(listing.nextUploadIdMarker()); } - req.setMaxUploads(maxKeys); + requestBuilder.maxUploads(maxKeys); + + ListMultipartUploadsRequest request = requestBuilder.build(); LOG.debug("[{}], Requesting next {} uploads prefix {}, " + "next key {}, next upload id {}", listCount, maxKeys, prefix, - req.getKeyMarker(), req.getUploadIdMarker()); + request.keyMarker(), request.uploadIdMarker()); listCount++; listing = invoker.retry("listMultipartUploads", prefix, true, trackDurationOfOperation(storeContext.getInstrumentation(), MULTIPART_UPLOAD_LIST.getSymbol(), - () -> s3.listMultipartUploads(req))); + () -> s3.listMultipartUploads(requestBuilder.build()))); LOG.debug("Listing found {} upload(s)", - listing.getMultipartUploads().size()); + listing.uploads().size()); LOG.debug("New listing state: {}", this); } } @@ -216,14 +219,14 @@ public static class UploadIterator */ private ListingIterator lister; /** Current listing: the last upload listing we fetched. */ - private MultipartUploadListing listing; + private ListMultipartUploadsResponse listing; /** Iterator over the current listing. */ private ListIterator batchIterator; @Retries.RetryTranslated public UploadIterator( final StoreContext storeContext, - AmazonS3 s3, + S3Client s3, int maxKeys, @Nullable String prefix) throws IOException { @@ -249,7 +252,7 @@ public MultipartUpload next() throws IOException { private boolean requestNextBatch() throws IOException { if (lister.hasNext()) { listing = lister.next(); - batchIterator = listing.getMultipartUploads().listIterator(); + batchIterator = listing.uploads().listIterator(); return batchIterator.hasNext(); } return false; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java index 0ce022aa88588..8ef7bc4b36c9c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java @@ -18,59 +18,56 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; -import com.amazonaws.services.s3.transfer.Upload; -import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; -import static com.amazonaws.event.ProgressEventType.TRANSFER_COMPLETED_EVENT; -import static com.amazonaws.event.ProgressEventType.TRANSFER_PART_STARTED_EVENT; +import software.amazon.awssdk.transfer.s3.ObjectTransfer; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + +import org.apache.hadoop.util.Progressable; /** * Listener to progress from AWS regarding transfers. */ -public class ProgressableProgressListener implements ProgressListener { +public class ProgressableProgressListener implements TransferListener { private static final Logger LOG = S3AFileSystem.LOG; private final S3AFileSystem fs; private final String key; private final Progressable progress; private long lastBytesTransferred; - private final Upload upload; /** * Instantiate. * @param fs filesystem: will be invoked with statistics updates * @param key key for the upload - * @param upload source of events * @param progress optional callback for progress. */ public ProgressableProgressListener(S3AFileSystem fs, String key, - Upload upload, Progressable progress) { this.fs = fs; this.key = key; - this.upload = upload; this.progress = progress; this.lastBytesTransferred = 0; } @Override - public void progressChanged(ProgressEvent progressEvent) { - if (progress != null) { - progress.progress(); - } + public void transferInitiated(TransferListener.Context.TransferInitiated context) { + fs.incrementWriteOperations(); + } - // There are 3 http ops here, but this should be close enough for now - ProgressEventType pet = progressEvent.getEventType(); - if (pet == TRANSFER_PART_STARTED_EVENT || - pet == TRANSFER_COMPLETED_EVENT) { - fs.incrementWriteOperations(); + @Override + public void transferComplete(TransferListener.Context.TransferComplete context) { + fs.incrementWriteOperations(); + } + + @Override + public void bytesTransferred(TransferListener.Context.BytesTransferred context) { + + if(progress != null) { + progress.progress(); } - long transferred = upload.getProgress().getBytesTransferred(); + long transferred = context.progressSnapshot().bytesTransferred(); long delta = transferred - lastBytesTransferred; fs.incrementPutProgressStatistics(key, delta); lastBytesTransferred = transferred; @@ -81,9 +78,10 @@ public void progressChanged(ProgressEvent progressEvent) { * This can handle race conditions in setup/teardown. * @return the number of bytes which were transferred after the notification */ - public long uploadCompleted() { - long delta = upload.getProgress().getBytesTransferred() - - lastBytesTransferred; + public long uploadCompleted(ObjectTransfer upload) { + + long delta = + upload.progress().snapshot().bytesTransferred() - lastBytesTransferred; if (delta > 0) { LOG.debug("S3A write delta changed after finished: {} bytes", delta); fs.incrementPutProgressStatistics(key, delta); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index 2febc87aec36d..4b450b7ee9ff0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -31,14 +31,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.SdkBaseException; import com.amazonaws.event.ProgressEvent; import com.amazonaws.event.ProgressEventType; import com.amazonaws.event.ProgressListener; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.UploadPartRequest; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; @@ -50,6 +45,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Abortable; @@ -441,7 +444,7 @@ public void close() throws IOException { uploadCurrentBlock(true); } // wait for the partial uploads to finish - final List partETags = + final List partETags = multiPartUpload.waitForAllPartUploads(); bytes = bytesSubmitted; @@ -599,27 +602,30 @@ private long putObject() throws IOException { final PutObjectRequest putObjectRequest = uploadData.hasFile() ? writeOperationHelper.createPutObjectRequest( key, - uploadData.getFile(), - builder.putOptions) + uploadData.getFile().length(), + builder.putOptions, + true) : writeOperationHelper.createPutObjectRequest( key, - uploadData.getUploadStream(), size, - builder.putOptions); - BlockUploadProgress callback = - new BlockUploadProgress( - block, progressListener, now()); - putObjectRequest.setGeneralProgressListener(callback); + builder.putOptions, + false); + + // TODO: You cannot currently add progress listeners to requests not via the TM. + // There is an open ticket for this with the SDK team. But need to check how important + // this is for us? + // BlockUploadProgress callback = + // new BlockUploadProgress( + // block, progressListener, now()); + // putObjectRequest.setGeneralProgressListener(callback); statistics.blockUploadQueued(size); - ListenableFuture putObjectResult = + ListenableFuture putObjectResult = executorService.submit(() -> { try { // the putObject call automatically closes the input // stream afterwards. - return writeOperationHelper.putObject( - putObjectRequest, - builder.putOptions, - statistics); + return writeOperationHelper.putObject(putObjectRequest, builder.putOptions, uploadData, + uploadData.hasFile(), statistics); } finally { cleanupWithLogger(LOG, uploadData, block); } @@ -770,7 +776,7 @@ protected IOStatisticsAggregator getThreadIOStatistics() { */ private class MultiPartUpload { private final String uploadId; - private final List> partETagsFutures; + private final List> partETagsFutures; private int partsSubmitted; private int partsUploaded; private long bytesSubmitted; @@ -875,18 +881,19 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, final int currentPartNumber = partETagsFutures.size() + 1; final UploadPartRequest request; final S3ADataBlocks.BlockUploadData uploadData; + final RequestBody requestBody; try { uploadData = block.startUpload(); - request = writeOperationHelper.newUploadPartRequest( + requestBody = uploadData.hasFile() + ? RequestBody.fromFile(uploadData.getFile()) + : RequestBody.fromInputStream(uploadData.getUploadStream(), size); + + request = writeOperationHelper.newUploadPartRequestBuilder( key, uploadId, currentPartNumber, - size, - uploadData.getUploadStream(), - uploadData.getFile(), - 0L); - request.setLastPart(isLast); - } catch (SdkBaseException aws) { + size).build(); + } catch (SdkException aws) { // catch and translate IOException e = translateException("upload", key, aws); // failure to start the upload. @@ -897,25 +904,32 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, noteUploadFailure(e); throw e; } - BlockUploadProgress callback = - new BlockUploadProgress( - block, progressListener, now()); - request.setGeneralProgressListener(callback); + + // TODO: You cannot currently add progress listeners to requests not via the TM. + // See also putObject + // BlockUploadProgress callback = + // new BlockUploadProgress( + // block, progressListener, now()); + // request.setGeneralProgressListener(callback); + statistics.blockUploadQueued(block.dataSize()); - ListenableFuture partETagFuture = + ListenableFuture partETagFuture = executorService.submit(() -> { // this is the queued upload operation // do the upload try { LOG.debug("Uploading part {} for id '{}'", currentPartNumber, uploadId); - PartETag partETag = writeOperationHelper.uploadPart(request, statistics) - .getPartETag(); + UploadPartResponse response = writeOperationHelper + .uploadPart(request, requestBody, statistics); LOG.debug("Completed upload of {} to part {}", - block, partETag.getETag()); + block, response.eTag()); LOG.debug("Stream statistics of {}", statistics); partsUploaded++; - return partETag; + return CompletedPart.builder() + .eTag(response.eTag()) + .partNumber(currentPartNumber) + .build(); } catch (IOException e) { // save immediately. noteUploadFailure(e); @@ -933,7 +947,7 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, * @return list of results * @throws IOException IO Problems */ - private List waitForAllPartUploads() throws IOException { + private List waitForAllPartUploads() throws IOException { LOG.debug("Waiting for {} uploads to complete", partETagsFutures.size()); try { return Futures.allAsList(partETagsFutures).get(); @@ -957,7 +971,7 @@ private List waitForAllPartUploads() throws IOException { */ private void cancelAllActiveFutures() { LOG.debug("Cancelling futures"); - for (ListenableFuture future : partETagsFutures) { + for (ListenableFuture future : partETagsFutures) { future.cancel(true); } } @@ -969,7 +983,7 @@ private void cancelAllActiveFutures() { * @param partETags list of partial uploads * @throws IOException on any problem */ - private void complete(List partETags) + private void complete(List partETags) throws IOException { maybeRethrowUploadFailure(); AtomicInteger errorCount = new AtomicInteger(0); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java index 2299892b35b94..de9d1ddca1666 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java @@ -47,7 +47,7 @@ * Set of classes to support output streaming into blocks which are then * uploaded as to S3 as a single PUT, or as part of a multipart request. */ -final class S3ADataBlocks { +public final class S3ADataBlocks { private static final Logger LOG = LoggerFactory.getLogger(S3ADataBlocks.class); @@ -101,7 +101,7 @@ static BlockFactory createFactory(S3AFileSystem owner, * It can be one of a file or an input stream. * When closed, any stream is closed. Any source file is untouched. */ - static final class BlockUploadData implements Closeable { +public static final class BlockUploadData implements Closeable { private final File file; private final InputStream uploadStream; @@ -109,7 +109,7 @@ static final class BlockUploadData implements Closeable { * File constructor; input stream will be null. * @param file file to upload */ - BlockUploadData(File file) { + public BlockUploadData(File file) { Preconditions.checkArgument(file.exists(), "No file: " + file); this.file = file; this.uploadStream = null; @@ -119,7 +119,7 @@ static final class BlockUploadData implements Closeable { * Stream constructor, file field will be null. * @param uploadStream stream to upload */ - BlockUploadData(InputStream uploadStream) { + public BlockUploadData(InputStream uploadStream) { Preconditions.checkNotNull(uploadStream, "rawUploadStream"); this.uploadStream = uploadStream; this.file = null; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 999186f8cd5ae..d5fd4f154f68c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -21,12 +21,14 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InterruptedIOException; import java.io.UncheckedIOException; import java.net.URI; import java.nio.file.AccessDeniedException; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -41,6 +43,7 @@ import java.util.Objects; import java.util.TreeSet; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -48,47 +51,57 @@ import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsResult; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import com.amazonaws.services.s3.transfer.Copy; -import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; -import com.amazonaws.services.s3.transfer.Upload; -import com.amazonaws.services.s3.transfer.model.CopyResult; -import com.amazonaws.services.s3.transfer.model.UploadResult; -import com.amazonaws.event.ProgressListener; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.GetBucketAclRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.NoSuchBucketException; +import software.amazon.awssdk.services.s3.model.ObjectCannedACL; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; +import software.amazon.awssdk.transfer.s3.CompletedCopy; +import software.amazon.awssdk.transfer.s3.CompletedFileUpload; +import software.amazon.awssdk.transfer.s3.Copy; +import software.amazon.awssdk.transfer.s3.CopyRequest; +import software.amazon.awssdk.transfer.s3.FileUpload; +import software.amazon.awssdk.transfer.s3.S3TransferManager; +import software.amazon.awssdk.transfer.s3.UploadFileRequest; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -112,7 +125,6 @@ import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; -import org.apache.hadoop.fs.s3a.impl.CopyOutcome; import org.apache.hadoop.fs.s3a.impl.CreateFileBuilder; import org.apache.hadoop.fs.s3a.impl.DeleteOperation; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; @@ -230,8 +242,8 @@ import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403_FORBIDDEN; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup; @@ -275,7 +287,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private URI uri; private Path workingDir; private String username; - private AmazonS3 s3; + private S3Client s3Client; + private S3AsyncClient s3AsyncClient; // initial callback policy is fail-once; it's there just to assist // some mock tests and other codepaths trying to call the low level // APIs on an uninitialized filesystem. @@ -294,7 +307,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private Listing listing; private long partSize; private boolean enableMultiObjectsDelete; - private TransferManager transfers; + private S3TransferManager transferManager; private ExecutorService boundedThreadPool; private ThreadPoolExecutor unboundedThreadPool; @@ -316,7 +329,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private static final Logger PROGRESS = LoggerFactory.getLogger("org.apache.hadoop.fs.s3a.S3AFileSystem.Progress"); private LocalDirAllocator directoryAllocator; - private CannedAccessControlList cannedACL; + private ObjectCannedACL cannedACL; /** * This must never be null; until initialized it just declares that there @@ -583,9 +596,6 @@ public void initialize(URI name, Configuration originalConf) // the encryption algorithms) bindAWSClient(name, delegationTokensEnabled); - initTransferManager(); - - // This initiates a probe against S3 for the bucket existing. doBucketProbing(); @@ -653,7 +663,7 @@ public void initialize(URI name, Configuration originalConf) AWS_S3_VECTOR_ACTIVE_RANGE_READS, DEFAULT_AWS_S3_VECTOR_ACTIVE_RANGE_READS, 1); vectoredIOContext = populateVectoredIOContext(conf); scheme = (this.uri != null && this.uri.getScheme() != null) ? this.uri.getScheme() : FS_S3A; - } catch (AmazonClientException e) { + } catch (SdkException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); stopAllServices(); @@ -826,8 +836,8 @@ protected static S3AStorageStatistics createStorageStatistics( } /** - * Verify that the bucket exists. This does not check permissions, - * not even read access. + * Verify that the bucket exists. + * TODO: Review: this used to call doesBucketExist in v1, which does not check permissions, not even read access. * Retry policy: retrying, translated. * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 @@ -838,7 +848,16 @@ protected void verifyBucketExists() if (!invoker.retry("doesBucketExist", bucket, true, trackDurationOfOperation(getDurationTrackerFactory(), STORE_EXISTS_PROBE.getSymbol(), - () -> s3.doesBucketExist(bucket)))) { + () -> { + try { + s3Client.headBucket(HeadBucketRequest.builder() + .bucket(bucket) + .build()); + return true; + } catch (NoSuchBucketException e) { + return false; + } + }))) { throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); } @@ -847,6 +866,7 @@ protected void verifyBucketExists() /** * Verify that the bucket exists. This will correctly throw an exception * when credentials are invalid. + * TODO: Review. May be redundant in v2. * Retry policy: retrying, translated. * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 @@ -861,11 +881,14 @@ protected void verifyBucketExistsV2() // Bug in SDK always returns `true` for AccessPoint ARNs with `doesBucketExistV2()` // expanding implementation to use ARNs and buckets correctly try { - s3.getBucketAcl(bucket); - } catch (AmazonServiceException ex) { - int statusCode = ex.getStatusCode(); - if (statusCode == SC_404 || - (statusCode == SC_403 && ex.getMessage().contains(AP_INACCESSIBLE))) { + s3Client.getBucketAcl(GetBucketAclRequest.builder() + .bucket(bucket) + .build()); + } catch (AwsServiceException ex) { + int statusCode = ex.statusCode(); + if (statusCode == SC_404_NOT_FOUND || + (statusCode == SC_403_FORBIDDEN && + ex.getMessage().contains(AP_INACCESSIBLE))) { return false; } } @@ -914,7 +937,6 @@ public Listing getListing() { * @param dtEnabled are delegation tokens enabled? * @throws IOException failure. */ - @SuppressWarnings("deprecation") private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { Configuration conf = getConf(); credentials = null; @@ -965,8 +987,8 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { ? conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT) : accessPoint.getEndpoint(); - S3ClientFactory.S3ClientCreationParameters parameters = null; - parameters = new S3ClientFactory.S3ClientCreationParameters() + S3ClientFactory.S3ClientCreationParameters parameters = + new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(credentials) .withPathUri(name) .withEndpoint(endpoint) @@ -974,11 +996,17 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false)) .withUserAgentSuffix(uaSuffix) .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) - .withRequestHandlers(auditManager.createRequestHandlers()); + .withExecutionInterceptors(auditManager.createExecutionInterceptors()); - s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) - .createS3Client(getUri(), + s3Client = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) + .createS3ClientV2(getUri(), parameters); + + s3AsyncClient = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) + .createS3AsyncClient(getUri(), + parameters); + + initTransferManager(); } /** @@ -1078,12 +1106,14 @@ protected RequestFactory createRequestFactory() { .toUpperCase(Locale.US); StorageClass storageClass = null; if (!storageClassConf.isEmpty()) { - try { storageClass = StorageClass.fromValue(storageClassConf); - } catch (IllegalArgumentException e) { - LOG.warn("Unknown storage class property {}: {}; falling back to default storage class", - STORAGE_CLASS, storageClassConf); - } + + if (storageClass.equals(StorageClass.UNKNOWN_TO_SDK_VERSION)) { + LOG.warn("Unknown storage class property {}: {}; falling back to default storage class", + STORAGE_CLASS, storageClassConf); + storageClass = null; + } + } else { LOG.debug("Unset storage class property {}; falling back to default storage class", STORAGE_CLASS); @@ -1151,21 +1181,22 @@ public EncryptionSecrets getEncryptionSecrets() { } private void initTransferManager() { - TransferManagerConfiguration transferConfiguration = - new TransferManagerConfiguration(); - transferConfiguration.setMinimumUploadPartSize(partSize); - transferConfiguration.setMultipartUploadThreshold(multiPartThreshold); - transferConfiguration.setMultipartCopyPartSize(partSize); - transferConfiguration.setMultipartCopyThreshold(multiPartThreshold); - - transfers = new TransferManager(s3, unboundedThreadPool); - transfers.setConfiguration(transferConfiguration); + // TODO: move to client factory? + transferManager = S3TransferManager.builder() + .s3ClientConfiguration(clientConfiguration -> + // TODO: other configuration options? + clientConfiguration + .minimumPartSizeInBytes(partSize) + .credentialsProvider(credentials)) + .transferConfiguration(transferConfiguration -> + transferConfiguration.executor(unboundedThreadPool)) // TODO: double-check + .build(); } private void initCannedAcls(Configuration conf) { String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL); if (!cannedACLName.isEmpty()) { - cannedACL = CannedAccessControlList.valueOf(cannedACLName); + cannedACL = ObjectCannedACL.valueOf(AWSCannedACL.valueOf(cannedACLName).toString()); } else { cannedACL = null; } @@ -1198,12 +1229,22 @@ private void initMultipartUploads(Configuration conf) throws IOException { public void abortOutstandingMultipartUploads(long seconds) throws IOException { Preconditions.checkArgument(seconds >= 0); - Date purgeBefore = - new Date(new Date().getTime() - seconds * 1000); + Instant purgeBefore = + Instant.now().minusSeconds(seconds); LOG.debug("Purging outstanding multipart uploads older than {}", purgeBefore); invoker.retry("Purging multipart uploads", bucket, true, - () -> transfers.abortMultipartUploads(bucket, purgeBefore)); + () -> { + MultipartUtils.UploadIterator uploadIterator = + MultipartUtils.listMultipartUploads(createStoreContext(), s3Client, null, maxKeys); + + while (uploadIterator.hasNext()) { + MultipartUpload upload = uploadIterator.next(); + if (upload.initiated().compareTo(purgeBefore) < 0) { + abortMultipartUpload(upload); + } + } + }); } /** @@ -1251,44 +1292,27 @@ public int getDefaultPort() { return 0; } - /** - * Returns the S3 client used by this filesystem. - * This is for internal use within the S3A code itself. - * @return AmazonS3Client - */ - private AmazonS3 getAmazonS3Client() { - return s3; - } - /** * Returns the S3 client used by this filesystem. * Warning: this must only be used for testing, as it bypasses core * S3A operations. * @param reason a justification for requesting access. - * @return AmazonS3Client + * @return S3Client */ @VisibleForTesting - public AmazonS3 getAmazonS3ClientForTesting(String reason) { - LOG.warn("Access to S3A client requested, reason {}", reason); - V2Migration.v1S3ClientRequested(); - return s3; + public S3Client getAmazonS3V2ClientForTesting(String reason) { + LOG.warn("Access to S3 client requested, reason {}", reason); + return s3Client; } /** * Set the client -used in mocking tests to force in a different client. * @param client client. */ - protected void setAmazonS3Client(AmazonS3 client) { - Preconditions.checkNotNull(client, "client"); - LOG.debug("Setting S3 client to {}", client); - s3 = client; - - // Need to use a new TransferManager that uses the new client. - // Also, using a new TransferManager requires a new threadpool as the old - // TransferManager will shut the thread pool down when it is garbage - // collected. - initThreadPools(getConf()); - initTransferManager(); + protected void setAmazonS3Client(S3Client client) { + Preconditions.checkNotNull(client, "clientV2"); + LOG.debug("Setting S3V2 client to {}", client); + s3Client = client; } /** @@ -1307,6 +1331,7 @@ public String getBucketLocation() throws IOException { /** * Get the region of a bucket; fixing up the region so it can be used * in the builders of other AWS clients. + * TODO: Review. Used only for S3Guard? * Requires the caller to have the AWS role permission * {@code s3:GetBucketLocation}. * Retry policy: retrying, translated. @@ -1325,7 +1350,10 @@ public String getBucketLocation(String bucketName) throws IOException { // If accessPoint then region is known from Arn accessPoint != null ? accessPoint.getRegion() - : s3.getBucketLocation(bucketName))); + : s3Client.getBucketLocation(GetBucketLocationRequest.builder() + .bucket(bucketName) + .build()) + .locationConstraintAsString())); return fixBucketRegion(region); } @@ -1413,7 +1441,7 @@ protected void setBucket(String bucket) { * Get the canned ACL of this FS. * @return an ACL, if any */ - CannedAccessControlList getCannedACL() { + ObjectCannedACL getCannedACL() { return cannedACL; } @@ -1644,18 +1672,18 @@ public void close() { } @Override - public GetObjectRequest newGetRequest(final String key) { + public GetObjectRequest.Builder newGetRequestBuilder(final String key) { // active the audit span used for the operation try (AuditSpan span = auditSpan.activate()) { - return getRequestFactory().newGetObjectRequest(key); + return getRequestFactory().newGetObjectRequestBuilder(key); } } @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject(GetObjectRequest request) { // active the audit span used for the operation try (AuditSpan span = auditSpan.activate()) { - return s3.getObject(request); + return s3Client.getObject(request); } } @@ -1682,18 +1710,19 @@ private final class WriteOperationHelperCallbacksImpl implements WriteOperationHelper.WriteOperationHelperCallbacks { @Override - public SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request) { - return s3.selectObjectContent(request); + public CompletableFuture selectObjectContent( + SelectObjectContentRequest request, + SelectObjectContentResponseHandler responseHandler) { + return s3AsyncClient.selectObjectContent(request, responseHandler); } @Override - public CompleteMultipartUploadResult completeMultipartUpload( + public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { - return s3.completeMultipartUpload(request); + return s3Client.completeMultipartUpload(request); } } - /** * Create the read context for reading from the referenced file, * using FS state as well as the status. @@ -2058,7 +2087,7 @@ public boolean rename(Path src, Path dst) throws IOException { innerRename(src, dst)); LOG.debug("Copied {} bytes", bytesCopied); return true; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("rename(" + src +", " + dst + ")", src, e); } catch (RenameFailedException e) { LOG.info("{}", e.getMessage()); @@ -2169,7 +2198,7 @@ private Pair initiateRename( * This operation throws an exception on any failure which needs to be * reported and downgraded to a failure. * Retries: retry translated, assuming all operations it is called do - * so. For safely, consider catch and handle AmazonClientException + * so. For safely, consider catch and handle SdkException * because this is such a complex method there's a risk it could surface. * @param source path to be renamed * @param dest new path after rename @@ -2180,12 +2209,12 @@ private Pair initiateRename( * @return the number of bytes copied. * @throws FileNotFoundException there's no source file. * @throws IOException on IO failure. - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK */ @Retries.RetryMixed private long innerRename(Path source, Path dest) throws RenameFailedException, FileNotFoundException, IOException, - AmazonClientException { + SdkException { Path src = qualify(source); Path dst = qualify(dest); @@ -2280,7 +2309,7 @@ public RemoteIterator listFilesAndDirectoryMarkers( } @Override - public CopyResult copyFile(final String srcKey, + public CopyObjectResponse copyFile(final String srcKey, final String destKey, final S3ObjectAttributes srcAttributes, final S3AReadOpContext readContext) throws IOException { @@ -2291,9 +2320,9 @@ public CopyResult copyFile(final String srcKey, @Override public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, SdkException, IOException { auditSpan.activate(); S3AFileSystem.this.removeKeys(keysToDelete, deleteFakeDir); } @@ -2402,7 +2431,7 @@ public int getMaxKeys() { @InterfaceAudience.LimitedPrivate("utilities") @Retries.RetryTranslated @InterfaceStability.Evolving - public ObjectMetadata getObjectMetadata(Path path) throws IOException { + public HeadObjectResponse getObjectMetadata(Path path) throws IOException { V2Migration.v1GetObjectMetadataCalled(); return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> getObjectMetadata(makeQualified(path), null, invoker, @@ -2419,7 +2448,7 @@ public ObjectMetadata getObjectMetadata(Path path) throws IOException { * @throws IOException IO and object access problems. */ @Retries.RetryTranslated - private ObjectMetadata getObjectMetadata(Path path, + private HeadObjectResponse getObjectMetadata(Path path, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { String key = pathToKey(path); @@ -2632,7 +2661,7 @@ protected DurationTrackerFactory nonNullDurationTrackerFactory( @Retries.RetryRaw @VisibleForTesting @InterfaceAudience.LimitedPrivate("external utilities") - ObjectMetadata getObjectMetadata(String key) throws IOException { + HeadObjectResponse getObjectMetadata(String key) throws IOException { return getObjectMetadata(key, null, invoker, "getObjectMetadata"); } @@ -2649,28 +2678,28 @@ ObjectMetadata getObjectMetadata(String key) throws IOException { * @throws RemoteFileChangedException if an unexpected version is detected */ @Retries.RetryRaw - protected ObjectMetadata getObjectMetadata(String key, + protected HeadObjectResponse getObjectMetadata(String key, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { - ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true, + HeadObjectResponse response = changeInvoker.retryUntranslated("GET " + key, true, () -> { - GetObjectMetadataRequest request - = getRequestFactory().newGetObjectMetadataRequest(key); + HeadObjectRequest.Builder requestBuilder = + getRequestFactory().newHeadObjectRequestBuilder(key); incrementStatistic(OBJECT_METADATA_REQUESTS); DurationTracker duration = getDurationTrackerFactory() .trackDuration(ACTION_HTTP_HEAD_REQUEST.getSymbol()); try { LOG.debug("HEAD {} with change tracker {}", key, changeTracker); if (changeTracker != null) { - changeTracker.maybeApplyConstraint(request); + changeTracker.maybeApplyConstraint(requestBuilder); } - ObjectMetadata objectMetadata = s3.getObjectMetadata(request); + HeadObjectResponse headObjectResponse = s3Client.headObject(requestBuilder.build()); if (changeTracker != null) { - changeTracker.processMetadata(objectMetadata, operation); + changeTracker.processMetadata(headObjectResponse, operation); } - return objectMetadata; - } catch(AmazonServiceException ase) { + return headObjectResponse; + } catch (AwsServiceException ase) { if (!isObjectNotFound(ase)) { // file not found is not considered a failure of the call, // so only switch the duration tracker to update failure @@ -2684,7 +2713,7 @@ protected ObjectMetadata getObjectMetadata(String key, } }); incrementReadOperations(); - return meta; + return response; } /** @@ -2713,9 +2742,9 @@ protected S3ListResult listObjects(S3ListRequest request, OBJECT_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1(s3.listObjects(request.getV1())); + return S3ListResult.v1(s3Client.listObjects(request.getV1())); } else { - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + return S3ListResult.v2(s3Client.listObjectsV2(request.getV2())); } })); } @@ -2758,15 +2787,21 @@ protected S3ListResult continueListObjects(S3ListRequest request, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1( - s3.listNextBatchOfObjects( - getRequestFactory() - .newListNextBatchOfObjectsRequest( - prevResult.getV1()))); + List prevListResult = prevResult.getV1().contents(); + + // Next markers are only present when a delimiter is specified. + String nextMarker; + if (prevResult.getV1().nextMarker() != null) { + nextMarker = prevResult.getV1().nextMarker(); + } else { + nextMarker = prevListResult.get(prevListResult.size() - 1).key(); + } + + return S3ListResult.v1(s3Client.listObjects( + request.getV1().toBuilder().marker(nextMarker).build())); } else { - request.getV2().setContinuationToken(prevResult.getV2() - .getNextContinuationToken()); - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + return S3ListResult.v2(s3Client.listObjectsV2(request.getV2().toBuilder() + .continuationToken(prevResult.getV2().nextContinuationToken()).build())); } })); } @@ -2796,14 +2831,14 @@ public void incrementWriteOperations() { * * Retry policy: retry untranslated; delete considered idempotent. * @param key key to blob to delete. - * @throws AmazonClientException problems working with S3 + * @throws SdkException problems working with S3 * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. */ @VisibleForTesting @Retries.RetryRaw protected void deleteObject(String key) - throws AmazonClientException, IOException { + throws SdkException, IOException { blockRootDelete(key); incrementWriteOperations(); try (DurationInfo ignored = @@ -2815,8 +2850,9 @@ protected void deleteObject(String key) incrementStatistic(OBJECT_DELETE_OBJECTS); trackDurationOfInvocation(getDurationTrackerFactory(), OBJECT_DELETE_REQUEST.getSymbol(), - () -> s3.deleteObject(getRequestFactory() - .newDeleteObjectRequest(key))); + () -> s3Client.deleteObject(getRequestFactory() + .newDeleteObjectRequestBuilder(key) + .build())); return null; }); } @@ -2829,14 +2865,14 @@ protected void deleteObject(String key) * @param f path path to delete * @param key key of entry * @param isFile is the path a file (used for instrumentation only) - * @throws AmazonClientException problems working with S3 + * @throws SdkException problems working with S3 * @throws IOException from invoker signature only -should not be raised. */ @Retries.RetryRaw void deleteObjectAtPath(Path f, String key, boolean isFile) - throws AmazonClientException, IOException { + throws SdkException, IOException { if (isFile) { instrumentation.fileDeleted(1); } else { @@ -2878,66 +2914,58 @@ private void blockRootDelete(String key) throws InvalidRequestException { * @return the AWS response * @throws MultiObjectDeleteException one or more of the keys could not * be deleted. - * @throws AmazonClientException amazon-layer failure. + * @throws SdkException amazon-layer failure. */ @Retries.RetryRaw - private DeleteObjectsResult deleteObjects(DeleteObjectsRequest deleteRequest) - throws MultiObjectDeleteException, AmazonClientException, IOException { + private DeleteObjectsResponse deleteObjects(DeleteObjectsRequest deleteRequest) + throws MultiObjectDeleteException, SdkException, IOException { incrementWriteOperations(); BulkDeleteRetryHandler retryHandler = new BulkDeleteRetryHandler(createStoreContext()); - int keyCount = deleteRequest.getKeys().size(); - try(DurationInfo ignored = + int keyCount = deleteRequest.delete().objects().size(); + try (DurationInfo ignored = new DurationInfo(LOG, false, "DELETE %d keys", keyCount)) { - return invoker.retryUntranslated("delete", - DELETE_CONSIDERED_IDEMPOTENT, - (text, e, r, i) -> { - // handle the failure - retryHandler.bulkDeleteRetried(deleteRequest, e); - }, - // duration is tracked in the bulk delete counters - trackDurationOfOperation(getDurationTrackerFactory(), - OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { + DeleteObjectsResponse response = + invoker.retryUntranslated("delete", DELETE_CONSIDERED_IDEMPOTENT, + (text, e, r, i) -> { + // handle the failure + retryHandler.bulkDeleteRetried(deleteRequest, e); + }, + // duration is tracked in the bulk delete counters + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); - return s3.deleteObjects(deleteRequest); - })); - } catch (MultiObjectDeleteException e) { - // one or more of the keys could not be deleted. - // log and rethrow - List errors = e.getErrors(); - LOG.debug("Partial failure of delete, {} errors", errors.size(), e); - for (MultiObjectDeleteException.DeleteError error : errors) { - LOG.debug("{}: \"{}\" - {}", - error.getKey(), error.getCode(), error.getMessage()); + return s3Client.deleteObjects(deleteRequest); + })); + + if (!response.errors().isEmpty()) { + // one or more of the keys could not be deleted. + // log and then throw + List errors = response.errors(); + LOG.debug("Partial failure of delete, {} errors", errors.size()); + for (S3Error error : errors) { + LOG.debug("{}: \"{}\" - {}", error.key(), error.code(), error.message()); + } + throw new MultiObjectDeleteException(errors); } - throw e; + + return response; } } /** - * Create a putObject request. + * Create a putObject request builder. * Adds the ACL and metadata * @param key key of object - * @param metadata metadata header - * @param srcfile source file + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker * @return the request */ - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, File srcfile) { - return requestFactory.newPutObjectRequest(key, metadata, null, srcfile); - } - - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header. - * @return a new metadata instance - */ - public ObjectMetadata newObjectMetadata(long length) { - return requestFactory.newObjectMetadata(length); + public PutObjectRequest.Builder newPutObjectRequestBuilder(String key, + long length, + boolean isDirectoryMarker) { + return requestFactory.newPutObjectRequestBuilder(key, null, length, isDirectoryMarker); } /** @@ -2954,15 +2982,22 @@ public ObjectMetadata newObjectMetadata(long length) { * Retry policy: N/A: the transfer manager is performing the upload. * Auditing: must be inside an audit span. * @param putObjectRequest the request + * @param file the file to be uploaded + * @param listener the progress listener for the request * @return the upload initiated */ @Retries.OnceRaw - public UploadInfo putObject(PutObjectRequest putObjectRequest) { + public UploadInfo putObject(PutObjectRequest putObjectRequest, File file, + ProgressableProgressListener listener) { long len = getPutRequestLength(putObjectRequest); - LOG.debug("PUT {} bytes to {} via transfer manager ", - len, putObjectRequest.getKey()); + LOG.debug("PUT {} bytes to {} via transfer manager ", len, putObjectRequest.key()); incrementPutStartStatistics(len); - Upload upload = transfers.upload(putObjectRequest); + + // TODO: Something not right with the TM listener, fix + FileUpload upload = transferManager.uploadFile( + UploadFileRequest.builder().putObjectRequest(putObjectRequest).source(file).build()); + // .overrideConfiguration(o -> o.addListener(listener)).build()); + return new UploadInfo(upload, len); } @@ -2977,30 +3012,37 @@ public UploadInfo putObject(PutObjectRequest putObjectRequest) { * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile represents if data to be uploaded is a file * @return the upload initiated - * @throws AmazonClientException on problems + * @throws SdkException on problems */ @VisibleForTesting @Retries.OnceRaw("For PUT; post-PUT actions are RetryExceptionsSwallowed") - PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest, + PutObjectResponse putObjectDirect(PutObjectRequest putObjectRequest, PutObjectOptions putOptions, + S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) - throws AmazonClientException { + throws SdkException { long len = getPutRequestLength(putObjectRequest); - LOG.debug("PUT {} bytes to {}", len, putObjectRequest.getKey()); + LOG.debug("PUT {} bytes to {}", len, putObjectRequest.key()); incrementPutStartStatistics(len); try { - PutObjectResult result = trackDurationOfSupplier( - nonNullDurationTrackerFactory(durationTrackerFactory), - OBJECT_PUT_REQUESTS.getSymbol(), () -> - s3.putObject(putObjectRequest)); + PutObjectResponse response = + trackDurationOfSupplier(nonNullDurationTrackerFactory(durationTrackerFactory), + OBJECT_PUT_REQUESTS.getSymbol(), + () -> isFile ? + s3Client.putObject(putObjectRequest, RequestBody.fromFile(uploadData.getFile())) : + s3Client.putObject(putObjectRequest, + RequestBody.fromInputStream(uploadData.getUploadStream(), + putObjectRequest.contentLength()))); incrementPutCompletedStatistics(true, len); // apply any post-write actions. - finishedWrite(putObjectRequest.getKey(), len, - result.getETag(), result.getVersionId(), + finishedWrite(putObjectRequest.key(), len, + response.eTag(), response.versionId(), putOptions); - return result; - } catch (SdkBaseException e) { + return response; + } catch (SdkException e) { incrementPutCompletedStatistics(false, len); throw e; } @@ -3014,11 +3056,16 @@ PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest, */ private long getPutRequestLength(PutObjectRequest putObjectRequest) { long len; - if (putObjectRequest.getFile() != null) { - len = putObjectRequest.getFile().length(); - } else { - len = putObjectRequest.getMetadata().getContentLength(); - } + + // TODO: Check why this exists. Content length is set before. Why can't that be used directly? +// if (putObjectRequest.getFile() != null) { +// len = putObjectRequest.getFile().length(); +// } else { +// len = putObjectRequest.getMetadata().getContentLength(); +// } + + len = putObjectRequest.contentLength(); + Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length"); return len; } @@ -3026,28 +3073,29 @@ private long getPutRequestLength(PutObjectRequest putObjectRequest) { /** * Upload part of a multi-partition file. * Increments the write and put counters. - * Important: this call does not close any input stream in the request. + * Important: this call does not close any input stream in the body. * * Retry Policy: none. - * @param request request * @param durationTrackerFactory duration tracker factory for operation + * @param request the upload part request. + * @param body the request body. * @return the result of the operation. - * @throws AmazonClientException on problems + * @throws AwsServiceException on problems */ @Retries.OnceRaw - UploadPartResult uploadPart(UploadPartRequest request, + UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, final DurationTrackerFactory durationTrackerFactory) - throws AmazonClientException { - long len = request.getPartSize(); + throws AwsServiceException { + long len = request.contentLength(); incrementPutStartStatistics(len); try { - UploadPartResult uploadPartResult = trackDurationOfSupplier( + UploadPartResponse uploadPartResponse = trackDurationOfSupplier( nonNullDurationTrackerFactory(durationTrackerFactory), MULTIPART_UPLOAD_PART_PUT.getSymbol(), () -> - s3.uploadPart(request)); + s3Client.uploadPart(request, body)); incrementPutCompletedStatistics(true, len); - return uploadPartResult; - } catch (AmazonClientException e) { + return uploadPartResponse; + } catch (AwsServiceException e) { incrementPutCompletedStatistics(false, len); throw e; } @@ -3112,56 +3160,57 @@ public void incrementPutProgressStatistics(String key, long bytes) { * be deleted in a multiple object delete operation. * The number of rejected objects will be added to the metric * {@link Statistic#FILES_DELETE_REJECTED}. - * @throws AmazonClientException other amazon-layer failure. + * @throws AwsServiceException other amazon-layer failure. */ @Retries.RetryRaw private void removeKeysS3( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, - IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { if (LOG.isDebugEnabled()) { LOG.debug("Initiating delete operation for {} objects", keysToDelete.size()); - for (DeleteObjectsRequest.KeyVersion key : keysToDelete) { - LOG.debug(" {} {}", key.getKey(), - key.getVersion() != null ? key.getVersion() : ""); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + LOG.debug(" {} {}", objectIdentifier.key(), + objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); } } if (keysToDelete.isEmpty()) { // exit fast if there are no keys to delete return; } - for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { - blockRootDelete(keyVersion.getKey()); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + blockRootDelete(objectIdentifier.key()); } try { if (enableMultiObjectsDelete) { if (keysToDelete.size() <= pageSize) { deleteObjects(getRequestFactory() - .newBulkDeleteRequest(keysToDelete)); + .newBulkDeleteRequestBuilder(keysToDelete) + .build()); } else { // Multi object deletion of more than 1000 keys is not supported // by s3. So we are paging the keys by page size. LOG.debug("Partitioning the keys to delete as it is more than " + "page size. Number of keys: {}, Page size: {}", keysToDelete.size(), pageSize); - for (List batchOfKeysToDelete : + for (List batchOfKeysToDelete : Lists.partition(keysToDelete, pageSize)) { deleteObjects(getRequestFactory() - .newBulkDeleteRequest(batchOfKeysToDelete)); + .newBulkDeleteRequestBuilder(batchOfKeysToDelete) + .build()); } } } else { - for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { - deleteObject(keyVersion.getKey()); + for (ObjectIdentifier objectIdentifier : keysToDelete) { + deleteObject(objectIdentifier.key()); } } } catch (MultiObjectDeleteException ex) { // partial delete. // Update the stats with the count of the actual number of successful // deletions. - int rejected = ex.getErrors().size(); + int rejected = ex.errors().size(); noteDeleted(keysToDelete.size() - rejected, deleteFakeDir); incrementStatistic(FILES_DELETE_REJECTED, rejected); throw ex; @@ -3194,15 +3243,15 @@ private void noteDeleted(final int count, final boolean deleteFakeDir) { * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @VisibleForTesting @Retries.RetryRaw public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException { try (DurationInfo ignored = new DurationInfo(LOG, false, "Deleting %d keys", keysToDelete.size())) { @@ -3272,7 +3321,7 @@ protected boolean deleteWithoutCloseCheck(Path f, boolean recursive) throws IOEx LOG.debug("Couldn't delete {} - does not exist: {}", path, e.toString()); instrumentation.errorIgnored(); return false; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("delete", path, e); } } @@ -3286,7 +3335,7 @@ protected boolean deleteWithoutCloseCheck(Path f, boolean recursive) throws IOEx */ @Retries.RetryTranslated private void createFakeDirectoryIfNecessary(Path f) - throws IOException, AmazonClientException { + throws IOException, SdkException { String key = pathToKey(f); // we only make the LIST call; the codepaths to get here should not // be reached if there is an empty dir marker -and if they do, it @@ -3306,7 +3355,7 @@ private void createFakeDirectoryIfNecessary(Path f) @Retries.RetryTranslated @VisibleForTesting protected void maybeCreateFakeParentDirectory(Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { Path parent = path.getParent(); if (parent != null && !parent.isRoot() && !isUnderMagicCommitPath(parent)) { createFakeDirectoryIfNecessary(parent); @@ -3360,11 +3409,11 @@ public FileStatus[] listStatus(Path f) throws FileNotFoundException, * @return the statuses of the files/directories in the given patch * @throws FileNotFoundException when the path does not exist; * @throws IOException due to an IO problem. - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK */ private RemoteIterator innerListStatus(Path f) throws FileNotFoundException, - IOException, AmazonClientException { + IOException, SdkException { Path path = qualify(f); LOG.debug("List status for path: {}", path); @@ -3428,15 +3477,15 @@ public S3ListRequest createListObjectsRequest(String key, private S3ListRequest createListObjectsRequest(String key, String delimiter, int limit) { if (!useListV1) { - ListObjectsV2Request request = - getRequestFactory().newListObjectsV2Request( + ListObjectsV2Request.Builder requestBuilder = + getRequestFactory().newListObjectsV2RequestBuilder( key, delimiter, limit); - return S3ListRequest.v2(request); + return S3ListRequest.v2(requestBuilder.build()); } else { - ListObjectsRequest request = - getRequestFactory().newListObjectsV1Request( + ListObjectsRequest.Builder requestBuilder = + getRequestFactory().newListObjectsV1RequestBuilder( key, delimiter, limit); - return S3ListRequest.v1(request); + return S3ListRequest.v1(requestBuilder.build()); } } @@ -3724,31 +3773,31 @@ S3AFileStatus s3GetFileStatus(final Path path, && probes.contains(StatusProbeEnum.Head)) { try { // look for the simple file - ObjectMetadata meta = getObjectMetadata(key); + HeadObjectResponse meta = getObjectMetadata(key); LOG.debug("Found exact file: normal file {}", key); - long contentLength = meta.getContentLength(); + long contentLength = meta.contentLength(); // check if CSE is enabled, then strip padded length. - if (isCSEEnabled - && meta.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null + if (isCSEEnabled && + meta.metadata().get(Headers.CRYPTO_CEK_ALGORITHM) != null && contentLength >= CSE_PADDING_LENGTH) { contentLength -= CSE_PADDING_LENGTH; } return new S3AFileStatus(contentLength, - dateToLong(meta.getLastModified()), + meta.lastModified().toEpochMilli(), path, getDefaultBlockSize(path), username, - meta.getETag(), - meta.getVersionId()); - } catch (AmazonServiceException e) { + meta.eTag(), + meta.versionId()); + } catch (AwsServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. // But: an empty bucket is also a 404, so check for that // and fail. - if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { + if (e.statusCode() != SC_404_NOT_FOUND || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("getFileStatus", path, e); } } @@ -3791,11 +3840,11 @@ S3AFileStatus s3GetFileStatus(final Path path, LOG.debug("Found root directory"); return new S3AFileStatus(Tristate.TRUE, path, username); } - } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { + } catch (AwsServiceException e) { + if (e.statusCode() != SC_404_NOT_FOUND || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("getFileStatus", path, e); } } @@ -3839,7 +3888,7 @@ private boolean s3Exists(final Path path, final Set probes) * @throws IOException IO problem * @throws FileAlreadyExistsException the destination file exists and * overwrite==false - * @throws AmazonClientException failure in the AWS SDK + * @throws SdkException failure in the AWS SDK */ @Override @AuditEntryPoint @@ -3894,13 +3943,12 @@ public void copyLocalFileFromTo(File file, Path from, Path to) throws IOExceptio to, () -> { final String key = pathToKey(to); - final ObjectMetadata om = newObjectMetadata(file.length()); Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); - S3AFileSystem.this.invoker.retry( - "putObject(" + "" + ")", to.toString(), - true, - () -> executePut(putObjectRequest, progress, putOptionsForPath(to))); + PutObjectRequest.Builder putObjectRequestBuilder = + newPutObjectRequestBuilder(key, file.length(), false); + S3AFileSystem.this.invoker.retry("putObject(" + "" + ")", to.toString(), true, + () -> executePut(putObjectRequestBuilder.build(), progress, putOptionsForPath(to), + file)); return null; }); @@ -3925,40 +3973,35 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) /** * Execute a PUT via the transfer manager, blocking for completion. - * If the waiting for completion is interrupted, the upload will be - * aborted before an {@code InterruptedIOException} is thrown. * @param putObjectRequest request * @param progress optional progress callback * @param putOptions put object options * @return the upload result - * @throws InterruptedIOException if the blocking was interrupted. + * @throws IOException IO failure */ @Retries.OnceRaw("For PUT; post-PUT actions are RetrySwallowed") - UploadResult executePut( + PutObjectResponse executePut( final PutObjectRequest putObjectRequest, final Progressable progress, - final PutObjectOptions putOptions) - throws InterruptedIOException { - String key = putObjectRequest.getKey(); + final PutObjectOptions putOptions, + final File file) + throws IOException { + String key = putObjectRequest.key(); long len = getPutRequestLength(putObjectRequest); - UploadInfo info = putObject(putObjectRequest); - Upload upload = info.getUpload(); - ProgressableProgressListener listener = new ProgressableProgressListener( - this, key, upload, progress); - upload.addProgressListener(listener); - UploadResult result = waitForUploadCompletion(key, info); - listener.uploadCompleted(); + ProgressableProgressListener listener = + new ProgressableProgressListener(this, putObjectRequest.key(), progress); + UploadInfo info = putObject(putObjectRequest, file, listener); + PutObjectResponse result = waitForUploadCompletion(key, info).response(); + listener.uploadCompleted(info.getFileUpload()); // post-write actions finishedWrite(key, len, - result.getETag(), result.getVersionId(), putOptions); + result.eTag(), result.versionId(), putOptions); return result; } /** * Wait for an upload to complete. - * If the waiting for completion is interrupted, the upload will be - * aborted before an {@code InterruptedIOException} is thrown. * If the upload (or its result collection) failed, this is where * the failure is raised as an AWS exception. * Calls {@link #incrementPutCompletedStatistics(boolean, long)} @@ -3966,24 +4009,20 @@ UploadResult executePut( * @param key destination key * @param uploadInfo upload to wait for * @return the upload result - * @throws InterruptedIOException if the blocking was interrupted. + * @throws IOException IO failure */ @Retries.OnceRaw - UploadResult waitForUploadCompletion(String key, UploadInfo uploadInfo) - throws InterruptedIOException { - Upload upload = uploadInfo.getUpload(); + CompletedFileUpload waitForUploadCompletion(String key, UploadInfo uploadInfo) + throws IOException { + FileUpload upload = uploadInfo.getFileUpload(); try { - UploadResult result = upload.waitForUploadResult(); + CompletedFileUpload result = upload.completionFuture().join(); incrementPutCompletedStatistics(true, uploadInfo.getLength()); return result; - } catch (InterruptedException e) { + } catch (CompletionException e) { LOG.info("Interrupted: aborting upload"); incrementPutCompletedStatistics(false, uploadInfo.getLength()); - upload.abort(); - throw (InterruptedIOException) - new InterruptedIOException("Interrupted in PUT to " - + keyToQualifiedPath(key)) - .initCause(e); + throw extractException("upload", key, e); } } @@ -4076,17 +4115,13 @@ public void close() throws IOException { * both the expected state of this FS and of failures while being stopped. */ protected synchronized void stopAllServices() { - // shutting down the transfer manager also shuts - // down the S3 client it is bonded to. - if (transfers != null) { - try { - transfers.shutdownNow(true); - } catch (RuntimeException e) { - // catch and swallow for resilience. - LOG.debug("When shutting down", e); - } - transfers = null; - } + closeAutocloseables(LOG, transferManager, + s3Client, + s3AsyncClient); + transferManager = null; + s3Client = null; + s3AsyncClient = null; + // At this point the S3A client is shut down, // now the executor pools are closed HadoopExecutors.shutdown(boundedThreadPool, LOG, @@ -4241,20 +4276,20 @@ public List listAWSPolicyRules( * @throws IOException Other IO problems */ @Retries.RetryTranslated - private CopyResult copyFile(String srcKey, String dstKey, long size, + private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) throws IOException, InterruptedIOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); - ProgressListener progressListener = progressEvent -> { - switch (progressEvent.getEventType()) { - case TRANSFER_PART_COMPLETED_EVENT: - incrementWriteOperations(); - break; - default: - break; - } - }; + // TODO: Transfer manager currently only provides transfer listeners for upload, + // add progress listener for copy when this is supported. +// TODO: Is the above still valid? Try to enable when logger issue is resolved. +// TransferListener progressListener = new TransferListener() { +// @Override +// public void transferComplete(Context.TransferComplete context) { +// incrementWriteOperations(); +// } +// }; ChangeTracker changeTracker = new ChangeTracker( keyToQualifiedPath(srcKey).toString(), @@ -4267,7 +4302,7 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, String action = "copyFile(" + srcKey + ", " + dstKey + ")"; Invoker readInvoker = readContext.getReadInvoker(); - ObjectMetadata srcom; + HeadObjectResponse srcom; try { srcom = once(action, srcKey, () -> @@ -4290,33 +4325,36 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, action, srcKey, true, () -> { - CopyObjectRequest copyObjectRequest = - getRequestFactory().newCopyObjectRequest(srcKey, dstKey, srcom); - changeTracker.maybeApplyConstraint(copyObjectRequest); + CopyObjectRequest.Builder copyObjectRequestBuilder = + getRequestFactory().newCopyObjectRequestBuilder(srcKey, dstKey, srcom); + changeTracker.maybeApplyConstraint(copyObjectRequestBuilder); incrementStatistic(OBJECT_COPY_REQUESTS); - Copy copy = transfers.copy(copyObjectRequest, - getAuditManager().createStateChangeListener()); - copy.addProgressListener(progressListener); - CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); - InterruptedException interruptedException = - copyOutcome.getInterruptedException(); - if (interruptedException != null) { - // copy interrupted: convert to an IOException. - throw (IOException)new InterruptedIOException( - "Interrupted copying " + srcKey - + " to " + dstKey + ", cancelling") - .initCause(interruptedException); - } - SdkBaseException awsException = copyOutcome.getAwsException(); - if (awsException != null) { - changeTracker.processException(awsException, "copy"); - throw awsException; + + Copy copy = transferManager.copy( + CopyRequest.builder() + .copyObjectRequest(copyObjectRequestBuilder.build()) +// TODO: Enable when logger issue is resolved. +// .overrideConfiguration(c -> c +// .addListener(getAuditManager().createTransferListener()) +// .addListener(progressListener)) + .build()); + + try { + CompletedCopy completedCopy = copy.completionFuture().join(); + CopyObjectResponse result = completedCopy.response(); + changeTracker.processResponse(result); + incrementWriteOperations(); + instrumentation.filesCopied(1, size); + return result; + } catch (CompletionException e) { + Throwable cause = e.getCause(); + if (cause instanceof SdkException) { + SdkException awsException = (SdkException)cause; + changeTracker.processException(awsException, "copy"); + throw awsException; + } + throw extractException(action, srcKey, e); } - CopyResult result = copyOutcome.getCopyResult(); - changeTracker.processResponse(result); - incrementWriteOperations(); - instrumentation.filesCopied(1, size); - return result; }); } @@ -4325,16 +4363,16 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, * Retry policy: none + untranslated. * @param request request to initiate * @return the result of the call - * @throws AmazonClientException on failures inside the AWS SDK + * @throws SdkException on failures inside the AWS SDK * @throws IOException Other IO problems */ @Retries.OnceRaw - InitiateMultipartUploadResult initiateMultipartUpload( - InitiateMultipartUploadRequest request) throws IOException { - LOG.debug("Initiate multipart upload to {}", request.getKey()); + CreateMultipartUploadResponse initiateMultipartUpload( + CreateMultipartUploadRequest request) throws IOException { + LOG.debug("Initiate multipart upload to {}", request.key()); return trackDurationOfSupplier(getDurationTrackerFactory(), OBJECT_MULTIPART_UPLOAD_INITIATED.getSymbol(), - () -> getAmazonS3Client().initiateMultipartUpload(request)); + () -> s3Client.createMultipartUpload(request)); } /** @@ -4407,22 +4445,22 @@ private PutObjectOptions putOptionsForPath(Path path) { */ @Retries.RetryExceptionsSwallowed private void deleteUnnecessaryFakeDirectories(Path path) { - List keysToRemove = new ArrayList<>(); + List keysToRemove = new ArrayList<>(); while (!path.isRoot()) { String key = pathToKey(path); key = (key.endsWith("/")) ? key : (key + "/"); LOG.trace("To delete unnecessary fake directory {} for {}", key, path); - keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key)); + keysToRemove.add(ObjectIdentifier.builder().key(key).build()); path = path.getParent(); } try { removeKeys(keysToRemove, true); - } catch(AmazonClientException | IOException e) { + } catch (AwsServiceException | IOException e) { instrumentation.errorIgnored(); if (LOG.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); - for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) { - sb.append(kv.getKey()).append(","); + for (ObjectIdentifier objectIdentifier : keysToRemove) { + sb.append(objectIdentifier.key()).append(","); } LOG.debug("While deleting keys {} ", sb.toString(), e); } @@ -4455,11 +4493,18 @@ private void createFakeDirectory(final String objectName, @Retries.RetryTranslated private void createEmptyObject(final String objectName, PutObjectOptions putOptions) throws IOException { - invoker.retry("PUT 0-byte object ", objectName, - true, () -> - putObjectDirect(getRequestFactory().newDirectoryMarkerRequest(objectName), - putOptions, - getDurationTrackerFactory())); + final InputStream im = new InputStream() { + @Override + public int read() throws IOException { + return -1; + } + }; + + S3ADataBlocks.BlockUploadData uploadData = new S3ADataBlocks.BlockUploadData(im); + + invoker.retry("PUT 0-byte object ", objectName, true, + () -> putObjectDirect(getRequestFactory().newDirectoryMarkerRequest(objectName).build(), + putOptions, uploadData, false, getDurationTrackerFactory())); incrementPutProgressStatistics(objectName, 0); instrumentation.directoryCreated(); } @@ -4716,10 +4761,10 @@ public EtagChecksum getFileChecksum(Path f, final long length) ETAG_CHECKSUM_ENABLED_DEFAULT)) { return trackDurationAndSpan(INVOCATION_GET_FILE_CHECKSUM, path, () -> { LOG.debug("getFileChecksum({})", path); - ObjectMetadata headers = getObjectMetadata(path, null, + HeadObjectResponse headers = getObjectMetadata(path, null, invoker, "getFileChecksum are"); - String eTag = headers.getETag(); + String eTag = headers.eTag(); return eTag != null ? new EtagChecksum(eTag) : null; }); } else { @@ -4801,7 +4846,7 @@ protected final class HeaderProcessingCallbacksImpl implements HeaderProcessing.HeaderProcessingCallbacks { @Override - public ObjectMetadata getObjectMetadata(final String key) + public HeadObjectResponse getObjectMetadata(final String key) throws IOException { return once("getObjectMetadata", key, () -> S3AFileSystem.this.getObjectMetadata(key)); @@ -4918,7 +4963,7 @@ private RemoteIterator innerListFiles( // If we have reached here, it means either there are files // in this directory or it is empty. return listFilesAssumingDir; - } catch (AmazonClientException e) { + } catch (SdkException e) { throw translateException("listFiles", path, e); } } @@ -5016,8 +5061,7 @@ public MultipartUtils.UploadIterator listUploads(@Nullable String prefix) // span is picked up retained in the listing. return trackDurationAndSpan(MULTIPART_UPLOAD_LIST, prefix, null, () -> MultipartUtils.listMultipartUploads( - createStoreContext(), - s3, prefix, maxKeys + createStoreContext(), s3Client, prefix, maxKeys )); } @@ -5028,7 +5072,7 @@ public MultipartUtils.UploadIterator listUploads(@Nullable String prefix) * Retry policy: retry, translated. * @return a listing of multipart uploads. * @param prefix prefix to scan for, "" for none - * @throws IOException IO failure, including any uprated AmazonClientException + * @throws IOException IO failure, including any uprated SdkException */ @InterfaceAudience.Private @Retries.RetryTranslated @@ -5040,9 +5084,9 @@ public List listMultipartUploads(String prefix) } String p = prefix; return invoker.retry("listMultipartUploads", p, true, () -> { - ListMultipartUploadsRequest request = getRequestFactory() - .newListMultipartUploadsRequest(p); - return s3.listMultipartUploads(request).getMultipartUploads(); + ListMultipartUploadsRequest.Builder requestBuilder = getRequestFactory() + .newListMultipartUploadsRequestBuilder(p); + return s3Client.listMultipartUploads(requestBuilder.build()).uploads(); }); } @@ -5055,10 +5099,10 @@ public List listMultipartUploads(String prefix) @Retries.OnceRaw void abortMultipartUpload(String destKey, String uploadId) { LOG.info("Aborting multipart upload {} to {}", uploadId, destKey); - getAmazonS3Client().abortMultipartUpload( - getRequestFactory().newAbortMultipartUploadRequest( + s3Client.abortMultipartUpload( + getRequestFactory().newAbortMultipartUploadRequestBuilder( destKey, - uploadId)); + uploadId).build()); } /** @@ -5070,18 +5114,18 @@ void abortMultipartUpload(String destKey, String uploadId) { void abortMultipartUpload(MultipartUpload upload) { String destKey; String uploadId; - destKey = upload.getKey(); - uploadId = upload.getUploadId(); + destKey = upload.key(); + uploadId = upload.uploadId(); if (LOG.isInfoEnabled()) { DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); LOG.debug("Aborting multipart upload {} to {} initiated by {} on {}", - uploadId, destKey, upload.getInitiator(), - df.format(upload.getInitiated())); + uploadId, destKey, upload.initiator(), + df.format(Date.from(upload.initiated()))); } - getAmazonS3Client().abortMultipartUpload( - getRequestFactory().newAbortMultipartUploadRequest( + s3Client.abortMultipartUpload( + getRequestFactory().newAbortMultipartUploadRequestBuilder( destKey, - uploadId)); + uploadId).build()); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 4b50ab2c04bd9..d3fa0a0e84799 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -22,6 +22,7 @@ import java.io.Closeable; import java.io.EOFException; import java.io.IOException; +import java.io.InputStream; import java.io.InterruptedIOException; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; @@ -31,9 +32,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.IntFunction; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +44,6 @@ import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileRange; -import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.impl.CombinedFileRange; import org.apache.hadoop.fs.VectoredReadUtils; @@ -61,6 +58,10 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import static java.util.Objects.requireNonNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.VectoredReadUtils.isOrderedDisjoint; @@ -125,14 +126,9 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, */ private volatile boolean closed; /** - * wrappedStream is associated with an object (instance of S3Object). When - * the object is garbage collected, the associated wrappedStream will be - * closed. Keep a reference to this object to prevent the wrapperStream - * still in use from being closed unexpectedly due to garbage collection. - * See HADOOP-17338 for details. + * Input stream returned by a getObject call. */ - private S3Object object; - private S3ObjectInputStream wrappedStream; + private ResponseInputStream wrappedStream; private final S3AReadOpContext context; private final InputStreamCallbacks client; @@ -271,28 +267,22 @@ private synchronized void reopen(String reason, long targetPos, long length, uri, reason, targetPos, contentRangeFinish, length, pos, nextReadPos, inputPolicy); + GetObjectRequest request = client.newGetRequestBuilder(key) + .range(S3AUtils.formatRange(targetPos, contentRangeFinish - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); long opencount = streamStatistics.streamOpened(); - GetObjectRequest request = client.newGetRequest(key) - .withRange(targetPos, contentRangeFinish - 1); String operation = opencount == 0 ? OPERATION_OPEN : OPERATION_REOPEN; String text = String.format("%s %s at %d", operation, uri, targetPos); - changeTracker.maybeApplyConstraint(request); - - object = onceTrackingDuration(text, uri, + wrappedStream = onceTrackingDuration(text, uri, streamStatistics.initiateGetRequest(), () -> client.getObject(request)); - - changeTracker.processResponse(object, operation, + changeTracker.processResponse(wrappedStream.response(), operation, targetPos); - wrappedStream = object.getObjectContent(); - contentRangeStart = targetPos; - if (wrappedStream == null) { - throw new PathIOException(uri, - "Null IO stream from " + operation + " of (" + reason + ") "); - } + contentRangeStart = targetPos; this.pos = targetPos; } @@ -505,14 +495,15 @@ public synchronized int read() throws IOException { */ @Retries.OnceTranslated private void onReadFailure(IOException ioe, boolean forceAbort) { + GetObjectResponse objectResponse = wrappedStream == null ? null : wrappedStream.response(); if (LOG.isDebugEnabled()) { LOG.debug("Got exception while trying to read from stream {}, " + "client: {} object: {}, trying to recover: ", - uri, client, object, ioe); + uri, client, objectResponse, ioe); } else { LOG.info("Got exception while trying to read from stream {}, " + "client: {} object: {}, trying to recover: " + ioe, - uri, client, object); + uri, client, objectResponse); } streamStatistics.readException(); closeStream("failure recovery", forceAbort, false); @@ -672,7 +663,6 @@ private CompletableFuture closeStream( CompletableFuture operation; SDKStreamDrainer drainer = new SDKStreamDrainer( uri, - object, wrappedStream, shouldAbort, (int) remaining, @@ -694,7 +684,6 @@ private CompletableFuture closeStream( // either the stream is closed in the blocking call or the async call is // submitted with its own copy of the references wrappedStream = null; - object = null; return operation; } @@ -910,23 +899,19 @@ public void readVectored(List ranges, private void readCombinedRangeAndUpdateChildren(CombinedFileRange combinedFileRange, IntFunction allocate) { LOG.debug("Start reading combined range {} from path {} ", combinedFileRange, pathStr); - // This reference must be kept till all buffers are populated as this is a - // finalizable object which closes the internal stream when gc triggers. - S3Object objectRange = null; - S3ObjectInputStream objectContent = null; + ResponseInputStream rangeContent = null; try { - objectRange = getS3ObjectAndValidateNotNull("readCombinedFileRange", + rangeContent = getS3ObjectInputStream("readCombinedFileRange", combinedFileRange.getOffset(), combinedFileRange.getLength()); - objectContent = objectRange.getObjectContent(); - populateChildBuffers(combinedFileRange, objectContent, allocate); + populateChildBuffers(combinedFileRange, rangeContent, allocate); } catch (Exception ex) { LOG.debug("Exception while reading a range {} from path {} ", combinedFileRange, pathStr, ex); for(FileRange child : combinedFileRange.getUnderlying()) { child.getData().completeExceptionally(ex); } } finally { - IOUtils.cleanupWithLogger(LOG, objectRange, objectContent); + IOUtils.cleanupWithLogger(LOG, rangeContent); } LOG.debug("Finished reading range {} from path {} ", combinedFileRange, pathStr); } @@ -939,7 +924,7 @@ private void readCombinedRangeAndUpdateChildren(CombinedFileRange combinedFileRa * @throws IOException any IOE. */ private void populateChildBuffers(CombinedFileRange combinedFileRange, - S3ObjectInputStream objectContent, + InputStream objectContent, IntFunction allocate) throws IOException { // If the combined file range just contains a single child // range, we only have to fill that one child buffer else @@ -971,7 +956,7 @@ private void populateChildBuffers(CombinedFileRange combinedFileRange, * @param drainQuantity how many bytes to drain. * @throws IOException any IOE. */ - private void drainUnnecessaryData(S3ObjectInputStream objectContent, long drainQuantity) + private void drainUnnecessaryData(InputStream objectContent, long drainQuantity) throws IOException { int drainBytes = 0; int readCount; @@ -1013,28 +998,24 @@ private void validateRangeRequest(FileRange range) throws EOFException { */ private void readSingleRange(FileRange range, ByteBuffer buffer) { LOG.debug("Start reading range {} from path {} ", range, pathStr); - // This reference must be kept till all buffers are populated as this is a - // finalizable object which closes the internal stream when gc triggers. - S3Object objectRange = null; - S3ObjectInputStream objectContent = null; + ResponseInputStream objectRange = null; try { long position = range.getOffset(); int length = range.getLength(); - objectRange = getS3ObjectAndValidateNotNull("readSingleRange", position, length); - objectContent = objectRange.getObjectContent(); - populateBuffer(length, buffer, objectContent); + objectRange = getS3ObjectInputStream("readSingleRange", position, length); + populateBuffer(length, buffer, objectRange); range.getData().complete(buffer); } catch (Exception ex) { LOG.warn("Exception while reading a range {} from path {} ", range, pathStr, ex); range.getData().completeExceptionally(ex); } finally { - IOUtils.cleanupWithLogger(LOG, objectRange, objectContent); + IOUtils.cleanupWithLogger(LOG, objectRange); } LOG.debug("Finished reading range {} from path {} ", range, pathStr); } /** - * Get the s3 object for S3 server for a specified range. + * Get the s3 object input stream for S3 server for a specified range. * Also checks if the vectored io operation has been stopped before and after * the http get request such that we don't waste time populating the buffers. * @param operationName name of the operation for which get object on S3 is called. @@ -1043,15 +1024,11 @@ private void readSingleRange(FileRange range, ByteBuffer buffer) { * @return result s3 object. * @throws IOException exception if any. */ - private S3Object getS3ObjectAndValidateNotNull(final String operationName, - final long position, - final int length) throws IOException { + private ResponseInputStream getS3ObjectInputStream( + final String operationName, final long position, final int length) throws IOException { checkIfVectoredIOStopped(); - S3Object objectRange = getS3Object(operationName, position, length); - if (objectRange.getObjectContent() == null) { - throw new PathIOException(uri, - "Null IO stream received during " + operationName); - } + ResponseInputStream objectRange = + getS3Object(operationName, position, length); checkIfVectoredIOStopped(); return objectRange; } @@ -1066,7 +1043,7 @@ private S3Object getS3ObjectAndValidateNotNull(final String operationName, */ private void populateBuffer(int length, ByteBuffer buffer, - S3ObjectInputStream objectContent) throws IOException { + InputStream objectContent) throws IOException { if (buffer.isDirect()) { VectoredReadUtils.readInDirectBuffer(length, buffer, @@ -1091,7 +1068,7 @@ private void populateBuffer(int length, * @param length number of bytes to fill in dest. * @throws IOException any IOE. */ - private void readByteArray(S3ObjectInputStream objectContent, + private void readByteArray(InputStream objectContent, byte[] dest, int offset, int length) throws IOException { @@ -1118,13 +1095,16 @@ private void readByteArray(S3ObjectInputStream objectContent, * @return S3Object result s3 object. * @throws IOException exception if any. */ - private S3Object getS3Object(String operationName, long position, - int length) throws IOException { - final GetObjectRequest request = client.newGetRequest(key) - .withRange(position, position + length - 1); - changeTracker.maybeApplyConstraint(request); + private ResponseInputStream getS3Object(String operationName, + long position, + int length) + throws IOException { + final GetObjectRequest request = client.newGetRequestBuilder(key) + .range(S3AUtils.formatRange(position, position + length - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); DurationTracker tracker = streamStatistics.initiateGetRequest(); - S3Object objectRange; + ResponseInputStream objectRange; Invoker invoker = context.getReadInvoker(); try { objectRange = invoker.retry(operationName, pathStr, true, @@ -1139,7 +1119,7 @@ private S3Object getS3Object(String operationName, long position, } finally { tracker.close(); } - changeTracker.processResponse(objectRange, operationName, + changeTracker.processResponse(objectRange.response(), operationName, position); return objectRange; } @@ -1293,11 +1273,11 @@ public IOStatistics getIOStatistics() { public interface InputStreamCallbacks extends Closeable { /** - * Create a GET request. + * Create a GET request builder. * @param key object key - * @return the request + * @return the request builder */ - GetObjectRequest newGetRequest(String key); + GetObjectRequest.Builder newGetRequestBuilder(String key); /** * Execute the request. @@ -1305,7 +1285,7 @@ public interface InputStreamCallbacks extends Closeable { * @return the response */ @Retries.OnceRaw - S3Object getObject(GetObjectRequest request); + ResponseInputStream getObject(GetObjectRequest request); /** * Submit some asynchronous work, for example, draining a stream. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 528a99f5e0966..d4208b082749b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -30,7 +30,6 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonClientException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +43,8 @@ import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.util.Preconditions; +import software.amazon.awssdk.core.exception.SdkException; + import static org.apache.hadoop.io.retry.RetryPolicies.*; import static org.apache.hadoop.fs.s3a.Constants.*; @@ -68,9 +69,9 @@ * * The retry policy is all built around that of the normal IO exceptions, * particularly those extracted from - * {@link S3AUtils#translateException(String, Path, AmazonClientException)}. + * {@link S3AUtils#translateException(String, Path, SdkException)}. * Because the {@link #shouldRetry(Exception, int, int, boolean)} method - * does this translation if an {@code AmazonClientException} is processed, + * does this translation if an {@code SdkException} is processed, * the policy defined for the IOEs also applies to the original exceptions. * * Put differently: this retry policy aims to work for handlers of the @@ -242,11 +243,10 @@ public RetryAction shouldRetry(Exception exception, boolean idempotent) throws Exception { Preconditions.checkArgument(exception != null, "Null exception"); Exception ex = exception; - if (exception instanceof AmazonClientException) { - // uprate the amazon client exception for the purpose of exception + if (exception instanceof SdkException) { + // update the sdk exception for the purpose of exception // processing. - ex = S3AUtils.translateException("", "", - (AmazonClientException) exception); + ex = S3AUtils.translateException("", "", (SdkException) exception); } return retryPolicy.shouldRetry(ex, retries, failovers, idempotent); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index e22433322c9c5..380a707efa262 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -18,22 +18,12 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AbortedException; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; import com.amazonaws.ClientConfiguration; import com.amazonaws.Protocol; -import com.amazonaws.SdkBaseException; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.retry.RetryUtils; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.Preconditions; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -49,16 +39,26 @@ import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; import org.apache.hadoop.fs.s3a.impl.NetworkBinding; -import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.Lists; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.retry.RetryUtils; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; + import javax.annotation.Nullable; import java.io.Closeable; import java.io.EOFException; @@ -78,19 +78,20 @@ import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; -import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator; @@ -109,6 +110,8 @@ public final class S3AUtils { = "instantiation exception"; static final String NOT_AWS_PROVIDER = "does not implement AWSCredentialsProvider"; + static final String NOT_AWS_V2_PROVIDER = + "does not implement AwsCredentialsProvider"; static final String ABSTRACT_PROVIDER = "is abstract and therefore cannot be created"; static final String ENDPOINT_KEY = "Endpoint"; @@ -159,7 +162,7 @@ private S3AUtils() { /** * Translate an exception raised in an operation into an IOException. * The specific type of IOException depends on the class of - * {@link AmazonClientException} passed in, and any status codes included + * {@link SdkException} passed in, and any status codes included * in the operation. That is: HTTP error codes are examined and can be * used to build a more specific response. * @@ -172,14 +175,14 @@ private S3AUtils() { */ public static IOException translateException(String operation, Path path, - AmazonClientException exception) { + SdkException exception) { return translateException(operation, path.toString(), exception); } /** * Translate an exception raised in an operation into an IOException. * The specific type of IOException depends on the class of - * {@link AmazonClientException} passed in, and any status codes included + * {@link SdkException} passed in, and any status codes included * in the operation. That is: HTTP error codes are examined and can be * used to build a more specific response. * @param operation operation @@ -190,12 +193,12 @@ public static IOException translateException(String operation, @SuppressWarnings("ThrowableInstanceNeverThrown") public static IOException translateException(@Nullable String operation, String path, - SdkBaseException exception) { + SdkException exception) { String message = String.format("%s%s: %s", operation, StringUtils.isNotEmpty(path)? (" on " + path) : "", exception); - if (!(exception instanceof AmazonServiceException)) { + if (!(exception instanceof AwsServiceException)) { Exception innerCause = containsInterruptedException(exception); if (innerCause != null) { // interrupted IO, or a socket exception underneath that class @@ -219,45 +222,49 @@ public static IOException translateException(@Nullable String operation, return new AWSClientIOException(message, exception); } else { IOException ioe; - AmazonServiceException ase = (AmazonServiceException) exception; + AwsServiceException ase = (AwsServiceException) exception; // this exception is non-null if the service exception is an s3 one - AmazonS3Exception s3Exception = ase instanceof AmazonS3Exception - ? (AmazonS3Exception) ase + S3Exception s3Exception = ase instanceof S3Exception + ? (S3Exception) ase : null; - int status = ase.getStatusCode(); - message = message + ":" + ase.getErrorCode(); + int status = ase.statusCode(); + if (ase.awsErrorDetails() != null) { + message = message + ":" + ase.awsErrorDetails().errorCode(); + } switch (status) { - case 301: - case 307: + case SC_301_MOVED_PERMANENTLY: + case SC_307_TEMPORARY_REDIRECT: if (s3Exception != null) { - if (s3Exception.getAdditionalDetails() != null && - s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) { - message = String.format("Received permanent redirect response to " - + "endpoint %s. This likely indicates that the S3 endpoint " - + "configured in %s does not match the AWS region containing " - + "the bucket.", - s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT); - } + // TODO: Can we get the endpoint in v2? + // Maybe not: https://github.com/aws/aws-sdk-java-v2/issues/3048 +// if (s3Exception.getAdditionalDetails() != null && +// s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) { +// message = String.format("Received permanent redirect response to " +// + "endpoint %s. This likely indicates that the S3 endpoint " +// + "configured in %s does not match the AWS region containing " +// + "the bucket.", +// s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT); +// } ioe = new AWSRedirectException(message, s3Exception); } else { ioe = new AWSRedirectException(message, ase); } break; - case 400: + case SC_400_BAD_REQUEST: ioe = new AWSBadRequestException(message, ase); break; // permissions - case 401: - case 403: + case SC_401_UNAUTHORIZED: + case SC_403_FORBIDDEN: ioe = new AccessDeniedException(path, null, message); ioe.initCause(ase); break; // the object isn't there - case 404: + case SC_404_NOT_FOUND: if (isUnknownBucket(ase)) { // this is a missing bucket ioe = new UnknownStoreException(path, message, ase); @@ -270,20 +277,20 @@ public static IOException translateException(@Nullable String operation, // this also surfaces sometimes and is considered to // be ~ a not found exception. - case 410: + case SC_410_GONE: ioe = new FileNotFoundException(message); ioe.initCause(ase); break; // method not allowed; seen on S3 Select. // treated as a bad request - case 405: + case SC_405_METHOD_NOT_ALLOWED: ioe = new AWSBadRequestException(message, s3Exception); break; // out of range. This may happen if an object is overwritten with // a shorter one while it is being read. - case 416: + case SC_416_RANGE_NOT_SATISFIABLE: ioe = new EOFException(message); ioe.initCause(ase); break; @@ -291,26 +298,26 @@ public static IOException translateException(@Nullable String operation, // this has surfaced as a "no response from server" message. // so rare we haven't replicated it. // Treating as an idempotent proxy error. - case 443: - case 444: + case SC_443_NO_RESPONSE: + case SC_444_NO_RESPONSE: ioe = new AWSNoResponseException(message, ase); break; // throttling - case 503: + case SC_503_SERVICE_UNAVAILABLE: ioe = new AWSServiceThrottledException(message, ase); break; // internal error - case 500: + case SC_500_INTERNAL_SERVER_ERROR: ioe = new AWSStatus500Exception(message, ase); break; - case 200: + case SC_200_OK: if (exception instanceof MultiObjectDeleteException) { // failure during a bulk delete - return translateDeleteException(message, - (MultiObjectDeleteException) exception); + return ((MultiObjectDeleteException) exception) + .translateException(message); } // other 200: FALL THROUGH @@ -336,10 +343,35 @@ public static IOException translateException(@Nullable String operation, public static IOException extractException(String operation, String path, ExecutionException ee) { + return convertExceptionCause(operation, path, ee.getCause()); + } + + /** + * Extract an exception from a failed future, and convert to an IOE. + * @param operation operation which failed + * @param path path operated on (may be null) + * @param ce completion exception + * @return an IOE which can be thrown + */ + public static IOException extractException(String operation, + String path, + CompletionException ce) { + return convertExceptionCause(operation, path, ce.getCause()); + } + + /** + * Convert the cause of a concurrent exception to an IOE. + * @param operation operation which failed + * @param path path operated on (may be null) + * @param cause cause of a concurrent exception + * @return an IOE which can be thrown + */ + private static IOException convertExceptionCause(String operation, + String path, + Throwable cause) { IOException ioe; - Throwable cause = ee.getCause(); - if (cause instanceof AmazonClientException) { - ioe = translateException(operation, path, (AmazonClientException) cause); + if (cause instanceof SdkException) { + ioe = translateException(operation, path, (SdkException) cause); } else if (cause instanceof IOException) { ioe = (IOException) cause; } else { @@ -377,7 +409,7 @@ static Exception containsInterruptedException(Throwable thrown) { * @return an IOE which can be rethrown */ private static InterruptedIOException translateInterruptedException( - SdkBaseException exception, + SdkException exception, final Exception innerCause, String message) { InterruptedIOException ioe; @@ -388,6 +420,7 @@ private static InterruptedIOException translateInterruptedException( if (name.endsWith(".ConnectTimeoutException") || name.endsWith(".ConnectionPoolTimeoutException") || name.endsWith("$ConnectTimeoutException")) { + // TODO: review in v2 // TCP connection http timeout from the shaded or unshaded filenames // com.amazonaws.thirdparty.apache.http.conn.ConnectTimeoutException ioe = new ConnectTimeoutException(message); @@ -411,10 +444,10 @@ private static InterruptedIOException translateInterruptedException( */ public static boolean isThrottleException(Exception ex) { return ex instanceof AWSServiceThrottledException - || (ex instanceof AmazonServiceException - && 503 == ((AmazonServiceException)ex).getStatusCode()) - || (ex instanceof SdkBaseException - && RetryUtils.isThrottlingException((SdkBaseException) ex)); + || (ex instanceof AwsServiceException + && 503 == ((AwsServiceException)ex).statusCode()) + || (ex instanceof SdkException + && RetryUtils.isThrottlingException((SdkException) ex)); } /** @@ -424,7 +457,8 @@ public static boolean isThrottleException(Exception ex) { * @param ex exception * @return true if this is believed to be a sign the connection was broken. */ - public static boolean isMessageTranslatableToEOF(SdkBaseException ex) { + public static boolean isMessageTranslatableToEOF(SdkException ex) { + // TODO: review in v2 return ex.toString().contains(EOF_MESSAGE_IN_XML_PARSER) || ex.toString().contains(EOF_READ_DIFFERENT_LENGTH); } @@ -434,47 +468,26 @@ public static boolean isMessageTranslatableToEOF(SdkBaseException ex) { * @param e exception * @return string details */ - public static String stringify(AmazonServiceException e) { + public static String stringify(AwsServiceException e) { StringBuilder builder = new StringBuilder( - String.format("%s: %s error %d: %s; %s%s%n", - e.getErrorType(), - e.getServiceName(), - e.getStatusCode(), - e.getErrorCode(), - e.getErrorMessage(), - (e.isRetryable() ? " (retryable)": "") + String.format("%s error %d: %s; %s%s%n", + e.awsErrorDetails().serviceName(), + e.statusCode(), + e.awsErrorDetails().errorCode(), + e.awsErrorDetails().errorMessage(), + (e.retryable() ? " (retryable)": "") )); - String rawResponseContent = e.getRawResponseContent(); + String rawResponseContent = e.awsErrorDetails().rawResponse().asUtf8String(); if (rawResponseContent != null) { builder.append(rawResponseContent); } return builder.toString(); } - /** - * Get low level details of an amazon exception for logging; multi-line. - * @param e exception - * @return string details - */ - public static String stringify(AmazonS3Exception e) { - // get the low level details of an exception, - StringBuilder builder = new StringBuilder( - stringify((AmazonServiceException) e)); - Map details = e.getAdditionalDetails(); - if (details != null) { - builder.append('\n'); - for (Map.Entry d : details.entrySet()) { - builder.append(d.getKey()).append('=') - .append(d.getValue()).append('\n'); - } - } - return builder.toString(); - } - /** * Create a files status instance from a listing. * @param keyPath path to entry - * @param summary summary from AWS + * @param s3Object s3Object entry * @param blockSize block size to declare. * @param owner owner of the file * @param eTag S3 object eTag or null if unavailable @@ -483,20 +496,20 @@ public static String stringify(AmazonS3Exception e) { * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, - S3ObjectSummary summary, + S3Object s3Object, long blockSize, String owner, String eTag, String versionId, boolean isCSEEnabled) { - long size = summary.getSize(); + long size = s3Object.size(); // check if cse is enabled; strip out constant padding length. if (isCSEEnabled && size >= CSE_PADDING_LENGTH) { size -= CSE_PADDING_LENGTH; } return createFileStatus(keyPath, - objectRepresentsDirectory(summary.getKey()), - size, summary.getLastModified(), blockSize, owner, eTag, versionId); + objectRepresentsDirectory(s3Object.key()), + size, Date.from(s3Object.lastModified()), blockSize, owner, eTag, versionId); } /** @@ -560,7 +573,6 @@ public static long dateToLong(final Date date) { /** * The standard AWS provider list for AWS connections. */ - @SuppressWarnings("deprecation") public static final List> STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( Arrays.asList( @@ -614,6 +626,22 @@ public static List> loadAWSProviderClasses(Configuration conf, } } + /** + * Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider. + */ + private static Map initCredentialProvidersMap() { + Map v1v2CredentialProviderMap = new HashMap<>(); + + v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider", + EnvironmentVariableCredentialsProvider.class); + v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper", + IAMInstanceCredentialsProvider.class); + v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider", + IAMInstanceCredentialsProvider.class); + + return v1v2CredentialProviderMap; + } + /** * Load list of AWS credential provider/credential provider factory classes; * support a forbidden list to prevent loops, mandate full secrets, etc. @@ -636,6 +664,8 @@ public static AWSCredentialProviderList buildAWSProviderList( List> awsClasses = loadAWSProviderClasses(conf, key, defaultValues.toArray(new Class[defaultValues.size()])); + + Map v1v2CredentialProviderMap = initCredentialProvidersMap(); // and if the list is empty, switch back to the defaults. // this is to address the issue that configuration.getClasses() // doesn't return the default if the config value is just whitespace. @@ -647,19 +677,22 @@ public static AWSCredentialProviderList buildAWSProviderList( AWSCredentialProviderList providers = new AWSCredentialProviderList(); for (Class aClass : awsClasses) { - // List of V1 credential providers that will be migrated with V2 upgrade - if (!Arrays.asList("EnvironmentVariableCredentialsProvider", - "EC2ContainerCredentialsProviderWrapper", "InstanceProfileCredentialsProvider") - .contains(aClass.getSimpleName()) && aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)) { - V2Migration.v1ProviderReferenced(aClass.getName()); - } - if (forbidden.contains(aClass)) { throw new IOException(E_FORBIDDEN_AWS_PROVIDER + " in option " + key + ": " + aClass); } - providers.add(createAWSCredentialProvider(conf, - aClass, binding)); + + if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) && + aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){ + providers.add(createAWSV2CredentialProvider(conf, + v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding)); + } else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) { + providers.add(createAWSV1CredentialProvider(conf, + aClass, binding)); + } else { + providers.add(createAWSV2CredentialProvider(conf, aClass, binding)); + } + } return providers; } @@ -686,7 +719,7 @@ public static AWSCredentialProviderList buildAWSProviderList( * @return the instantiated class * @throws IOException on any instantiation failure. */ - private static AWSCredentialsProvider createAWSCredentialProvider( + private static AWSCredentialsProvider createAWSV1CredentialProvider( Configuration conf, Class credClass, @Nullable URI uri) throws IOException { @@ -743,9 +776,9 @@ private static AWSCredentialsProvider createAWSCredentialProvider( } if (targetException instanceof IOException) { throw (IOException) targetException; - } else if (targetException instanceof SdkBaseException) { + } else if (targetException instanceof SdkException) { throw translateException("Instantiate " + className, "", - (SdkBaseException) targetException); + (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed throw new IOException(className + " " + INSTANTIATION_EXCEPTION @@ -760,6 +793,105 @@ private static AWSCredentialsProvider createAWSCredentialProvider( } } + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
    + *
  1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
  2. + *
  3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
  4. + *
  5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or
  6. + *
  7. a public default constructor.
  8. + *
+ * + * @param conf configuration + * @param credClass credential class + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + private static AwsCredentialsProvider createAWSV2CredentialProvider( + Configuration conf, + Class credClass, + @Nullable URI uri) throws IOException { + AwsCredentialsProvider credentials = null; + String className = credClass.getName(); + if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) { + throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER); + } + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + + try { + // new X(uri, conf) + Constructor cons = getConstructor(credClass, URI.class, + Configuration.class); + if (cons != null) { + credentials = (AwsCredentialsProvider)cons.newInstance(uri, conf); + return credentials; + } + // new X(conf) + cons = getConstructor(credClass, Configuration.class); + if (cons != null) { + credentials = (AwsCredentialsProvider)cons.newInstance(conf); + return credentials; + } + + // X.getInstance() + Method factory = getFactoryMethod(credClass, AwsCredentialsProvider.class, + "create"); + if (factory != null) { + credentials = (AwsCredentialsProvider)factory.invoke(null); + return credentials; + } + + // new X() + cons = getConstructor(credClass); + if (cons != null) { + credentials = (AwsCredentialsProvider)cons.newInstance(); + return credentials; + } + + // no supported constructor or factory method found + throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION + + ". A class specified in %s must provide a public constructor " + + "of a supported signature, or a public factory method named " + + "create that accepts no arguments.", + className, AWS_CREDENTIALS_PROVIDER)); + } catch (InvocationTargetException e) { + // TODO: Can probably be moved to a common method, but before doing this, check if we still + // want to extend V2 providers the same way v1 providers are. + Throwable targetException = e.getTargetException(); + if (targetException == null) { + targetException = e; + } + if (targetException instanceof IOException) { + throw (IOException) targetException; + } else if (targetException instanceof SdkException) { + throw translateException("Instantiate " + className, "", + (SdkException) targetException); + } else { + // supported constructor or factory method found, but the call failed + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + + ": " + targetException, + targetException); + } + } catch (ReflectiveOperationException | IllegalArgumentException e) { + // supported constructor or factory method found, but the call failed + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + + ": " + e, + e); + } + } + + /** * Set a key if the value is non-empty. * @param config config to patch @@ -946,13 +1078,13 @@ static String lookupPassword(Configuration conf, String key, String defVal) /** * String information about a summary entry for debug messages. - * @param summary summary object + * @param s3Object s3Object entry * @return string value */ - public static String stringify(S3ObjectSummary summary) { - StringBuilder builder = new StringBuilder(summary.getKey().length() + 100); - builder.append(summary.getKey()).append(' '); - builder.append("size=").append(summary.getSize()); + public static String stringify(S3Object s3Object) { + StringBuilder builder = new StringBuilder(s3Object.key().length() + 100); + builder.append(s3Object.key()).append(' '); + builder.append("size=").append(s3Object.size()); return builder.toString(); } @@ -1928,4 +2060,15 @@ public String toString() { } }; + /** + * Format a byte range for a request header. + * See https://www.rfc-editor.org/rfc/rfc9110.html#section-14.1.2 + * + * @param rangeStart the start byte offset + * @param rangeEnd the end byte offset (inclusive) + * @return a formatted byte range + */ + public static String formatRange(long rangeStart, long rangeEnd) { + return String.format("bytes=%d-%d", rangeStart, rangeEnd); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 9010f34dc259c..97a9bebdd226d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -25,8 +25,6 @@ import java.util.List; import java.util.Map; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.monitoring.MonitoringListener; import com.amazonaws.services.s3.AmazonS3; @@ -34,6 +32,11 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; + import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; /** @@ -49,7 +52,6 @@ */ @InterfaceAudience.LimitedPrivate("HBoss") @InterfaceStability.Evolving -@Deprecated public interface S3ClientFactory { /** @@ -63,6 +65,34 @@ public interface S3ClientFactory { AmazonS3 createS3Client(URI uri, S3ClientCreationParameters parameters) throws IOException; + /** + * Creates a new {@link S3Client}. + * The client returned supports synchronous operations. For + * asynchronous operations, use + * {@link #createS3AsyncClient(URI, S3ClientCreationParameters)}. + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return S3 client + * @throws IOException on any IO problem + */ + S3Client createS3ClientV2(URI uri, + S3ClientCreationParameters parameters) throws IOException; + + /** + * Creates a new {@link S3AsyncClient}. + * The client returned supports asynchronous operations. For + * synchronous operations, use + * {@link #createS3ClientV2(URI, S3ClientCreationParameters)}. + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return Async S3 client + * @throws IOException on any IO problem + */ + S3AsyncClient createS3AsyncClient(URI uri, + S3ClientCreationParameters parameters) throws IOException; + /** * Settings for the S3 Client. * Implemented as a class to pass in so that adding @@ -74,7 +104,7 @@ final class S3ClientCreationParameters { /** * Credentials. */ - private AWSCredentialsProvider credentialSet; + private AwsCredentialsProvider credentialSet; /** * Endpoint. @@ -109,9 +139,9 @@ final class S3ClientCreationParameters { private boolean requesterPays; /** - * Request handlers; used for auditing, X-Ray etc. - */ - private List requestHandlers; + * Execution interceptors; used for auditing, X-Ray etc. + * */ + private List executionInterceptors; /** * Suffix to UA. @@ -125,22 +155,22 @@ final class S3ClientCreationParameters { private URI pathUri; /** - * List of request handlers to include in the chain - * of request execution in the SDK. - * @return the handler list + * List of execution interceptors to include in the chain + * of interceptors in the SDK. + * @return the interceptors list */ - public List getRequestHandlers() { - return requestHandlers; + public List getExecutionInterceptors() { + return executionInterceptors; } /** - * List of request handlers. - * @param handlers handler list. + * List of execution interceptors. + * @param interceptors interceptors list. * @return this object */ - public S3ClientCreationParameters withRequestHandlers( - @Nullable final List handlers) { - requestHandlers = handlers; + public S3ClientCreationParameters withExecutionInterceptors( + @Nullable final List interceptors) { + executionInterceptors = interceptors; return this; } @@ -191,7 +221,7 @@ public boolean isRequesterPays() { return requesterPays; } - public AWSCredentialsProvider getCredentialSet() { + public AwsCredentialsProvider getCredentialSet() { return credentialSet; } @@ -202,7 +232,7 @@ public AWSCredentialsProvider getCredentialSet() { */ public S3ClientCreationParameters withCredentialSet( - final AWSCredentialsProvider value) { + final AwsCredentialsProvider value) { credentialSet = value; return this; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java index d51211516f251..c729f3de15f08 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java @@ -18,8 +18,8 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; /** * API version-independent container for S3 List requests. @@ -78,14 +78,14 @@ public ListObjectsV2Request getV2() { public String toString() { if (isV1()) { return String.format(DESCRIPTION, - v1Request.getBucketName(), v1Request.getPrefix(), - v1Request.getDelimiter(), v1Request.getMaxKeys(), - v1Request.isRequesterPays()); + v1Request.bucket(), v1Request.prefix(), + v1Request.delimiter(), v1Request.maxKeys(), + v1Request.requestPayerAsString()); } else { return String.format(DESCRIPTION, - v2Request.getBucketName(), v2Request.getPrefix(), - v2Request.getDelimiter(), v2Request.getMaxKeys(), - v2Request.isRequesterPays()); + v2Request.bucket(), v2Request.prefix(), + v2Request.delimiter(), v2Request.maxKeys(), + v2Request.requestPayerAsString()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java index 69c42bfe1471a..c77311211abcb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java @@ -22,19 +22,21 @@ import java.util.List; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.S3ObjectSummary; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; + import org.slf4j.Logger; /** * API version-independent container for S3 List responses. */ public class S3ListResult { - private ObjectListing v1Result; - private ListObjectsV2Result v2Result; + private ListObjectsResponse v1Result; + private ListObjectsV2Response v2Result; - protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { + protected S3ListResult(ListObjectsResponse v1, ListObjectsV2Response v2) { v1Result = v1; v2Result = v2; } @@ -44,7 +46,7 @@ protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { * @param result v1 result * @return new list result container */ - public static S3ListResult v1(ObjectListing result) { + public static S3ListResult v1(ListObjectsResponse result) { return new S3ListResult(result, null); } @@ -53,7 +55,7 @@ public static S3ListResult v1(ObjectListing result) { * @param result v2 result * @return new list result container */ - public static S3ListResult v2(ListObjectsV2Result result) { + public static S3ListResult v2(ListObjectsV2Response result) { return new S3ListResult(null, result); } @@ -65,19 +67,19 @@ public boolean isV1() { return v1Result != null; } - public ObjectListing getV1() { + public ListObjectsResponse getV1() { return v1Result; } - public ListObjectsV2Result getV2() { + public ListObjectsV2Response getV2() { return v2Result; } - public List getObjectSummaries() { + public List getS3Objects() { if (isV1()) { - return v1Result.getObjectSummaries(); + return v1Result.contents(); } else { - return v2Result.getObjectSummaries(); + return v2Result.contents(); } } @@ -89,21 +91,21 @@ public boolean isTruncated() { } } - public List getCommonPrefixes() { + public List getCommonPrefixes() { if (isV1()) { - return v1Result.getCommonPrefixes(); + return v1Result.commonPrefixes(); } else { - return v2Result.getCommonPrefixes(); + return v2Result.commonPrefixes(); } } /** - * Get the list of keys in the object summary. + * Get the list of keys in the list result. * @return a possibly empty list */ - private List objectSummaryKeys() { - return getObjectSummaries().stream() - .map(S3ObjectSummary::getKey) + private List objectKeys() { + return getS3Objects().stream() + .map(S3Object::key) .collect(Collectors.toList()); } @@ -112,9 +114,8 @@ private List objectSummaryKeys() { * @return true if the result is non-empty */ public boolean hasPrefixesOrObjects() { - return !(getCommonPrefixes()).isEmpty() - || !getObjectSummaries().isEmpty(); + || !getS3Objects().isEmpty(); } /** @@ -128,7 +129,7 @@ public boolean representsEmptyDirectory( // no children. // So the listing must contain the marker entry only as an object, // and prefixes is null - List keys = objectSummaryKeys(); + List keys = objectKeys(); return keys.size() == 1 && keys.contains(dirKey) && getCommonPrefixes().isEmpty(); } @@ -138,15 +139,15 @@ public boolean representsEmptyDirectory( * @param log log to use */ public void logAtDebug(Logger log) { - Collection prefixes = getCommonPrefixes(); - Collection summaries = getObjectSummaries(); + Collection prefixes = getCommonPrefixes(); + Collection s3Objects = getS3Objects(); log.debug("Prefix count = {}; object count={}", - prefixes.size(), summaries.size()); - for (S3ObjectSummary summary : summaries) { - log.debug("Summary: {} {}", summary.getKey(), summary.getSize()); + prefixes.size(), s3Objects.size()); + for (S3Object s3Object : s3Objects) { + log.debug("Summary: {} {}", s3Object.key(), s3Object.size()); } - for (String prefix : prefixes) { - log.debug("Prefix: {}", prefix); + for (CommonPrefix prefix : prefixes) { + log.debug("Prefix: {}", prefix.prefix()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java index 0a0454854b2ac..4fc5b8658b605 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.transfer.model.CopyResult; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Path; @@ -74,31 +72,6 @@ public S3ObjectAttributes( this.len = len; } - /** - * Construct from the result of a copy and those parameters - * which aren't included in an AWS SDK response. - * @param path path - * @param copyResult copy result. - * @param serverSideEncryptionAlgorithm current encryption algorithm - * @param serverSideEncryptionKey any server side encryption key? - * @param len object length - */ - public S3ObjectAttributes( - final Path path, - final CopyResult copyResult, - final S3AEncryptionMethods serverSideEncryptionAlgorithm, - final String serverSideEncryptionKey, - final long len) { - this.bucket = copyResult.getDestinationBucketName(); - this.key = copyResult.getDestinationKey(); - this.path = path; - this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm; - this.serverSideEncryptionKey = serverSideEncryptionKey; - this.eTag = copyResult.getETag(); - this.versionId = copyResult.getVersionId(); - this.len = len; - } - public String getBucket() { return bucket; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java index 6579a2bc3e7d2..b7de937f5137a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SharedInstanceCredentialProvider.java @@ -39,6 +39,5 @@ */ @InterfaceAudience.Public @InterfaceStability.Evolving -@SuppressWarnings("deprecation") public final class SharedInstanceCredentialProvider extends IAMInstanceCredentialsProvider { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java index 50a2dd5fb3fc2..13008e8d73e41 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java @@ -18,9 +18,10 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + import org.apache.hadoop.classification.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -42,13 +43,10 @@ * property fs.s3a.aws.credentials.provider. Therefore, changing the class name * would be a backward-incompatible change. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Stable -@Deprecated -public class SimpleAWSCredentialsProvider implements AWSCredentialsProvider { +public class SimpleAWSCredentialsProvider implements AwsCredentialsProvider { public static final String NAME = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"; @@ -80,17 +78,14 @@ public SimpleAWSCredentialsProvider(final URI uri, final Configuration conf) } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey)) { - return new BasicAWSCredentials(accessKey, secretKey); + return AwsBasicCredentials.create(accessKey, secretKey); } throw new NoAwsCredentialsException("SimpleAWSCredentialsProvider", "No AWS credentials in the Hadoop configuration"); } - @Override - public void refresh() {} - @Override public String toString() { return getClass().getSimpleName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java index db3d0bb13297c..5587b11a36f88 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java @@ -21,7 +21,7 @@ import javax.annotation.Nullable; import java.io.IOException; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import java.net.URI; @@ -44,12 +44,9 @@ * This credential provider must not fail in creation because that will * break a chain of credential providers. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Stable -@Deprecated public class TemporaryAWSCredentialsProvider extends AbstractSessionCredentialsProvider { public static final String NAME @@ -92,7 +89,7 @@ public TemporaryAWSCredentialsProvider( * @throws NoAwsCredentialsException the credentials are actually empty. */ @Override - protected AWSCredentials createCredentials(Configuration config) + protected AwsCredentials createCredentials(Configuration config) throws IOException { MarshalledCredentials creds = MarshalledCredentialBinding.fromFileSystem( getUri(), config); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java index 238cd97ed8f48..ee3a7c8a7e557 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java @@ -18,22 +18,22 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.transfer.Upload; +import software.amazon.awssdk.transfer.s3.FileUpload; /** * Simple struct that contains information about a S3 upload. */ public class UploadInfo { - private final Upload upload; + private final FileUpload fileUpload; private final long length; - public UploadInfo(Upload upload, long length) { - this.upload = upload; + public UploadInfo(FileUpload upload, long length) { + this.fileUpload = upload; this.length = length; } - public Upload getUpload() { - return upload; + public FileUpload getFileUpload() { + return fileUpload; } public long getLength() { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 3f42d2caf4a0b..98edcb9d7d4ba 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -19,29 +19,28 @@ package org.apache.hadoop.fs.s3a; import javax.annotation.Nullable; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.util.List; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -50,6 +49,8 @@ import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.s3a.impl.StoreContext; +import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; +import org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; import org.apache.hadoop.fs.s3a.select.SelectBinding; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; @@ -240,49 +241,25 @@ private void deactivateAuditSpan() { /** * Create a {@link PutObjectRequest} request against the specific key. * @param destKey destination key - * @param inputStream source data. * @param length size, if known. Use -1 for not known * @param options options for the request + * @param isFile is data to be uploaded a file * @return the request */ @Retries.OnceRaw public PutObjectRequest createPutObjectRequest(String destKey, - InputStream inputStream, long length, - final PutObjectOptions options) { - activateAuditSpan(); - ObjectMetadata objectMetadata = newObjectMetadata(length); - return getRequestFactory().newPutObjectRequest( - destKey, - objectMetadata, - options, - inputStream); - } + final PutObjectOptions options, + boolean isFile) { - /** - * Create a {@link PutObjectRequest} request to upload a file. - * @param dest key to PUT to. - * @param sourceFile source file - * @param options options for the request - * @return the request - */ - @Retries.OnceRaw - public PutObjectRequest createPutObjectRequest( - String dest, - File sourceFile, - final PutObjectOptions options) { activateAuditSpan(); - final ObjectMetadata objectMetadata = - newObjectMetadata((int) sourceFile.length()); - - PutObjectRequest putObjectRequest = getRequestFactory(). - newPutObjectRequest(dest, - objectMetadata, - options, - sourceFile); - return putObjectRequest; + + return getRequestFactory() + .newPutObjectRequestBuilder(destKey, options, length, false) + .build(); } + /** * Callback on a successful write. * @param length length of the write @@ -298,17 +275,6 @@ public void writeFailed(Exception ex) { LOG.debug("Write to {} failed", this, ex); } - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * @param length size, if known. Use -1 for not known - * @return a new metadata instance - */ - public ObjectMetadata newObjectMetadata(long length) { - return getRequestFactory().newObjectMetadata(length); - } - /** * {@inheritDoc} */ @@ -321,11 +287,11 @@ public String initiateMultiPartUpload( try (AuditSpan span = activateAuditSpan()) { return retry("initiate MultiPartUpload", destKey, true, () -> { - final InitiateMultipartUploadRequest initiateMPURequest = - getRequestFactory().newMultipartUploadRequest( + final CreateMultipartUploadRequest.Builder initiateMPURequestBuilder = + getRequestFactory().newMultipartUploadRequestBuilder( destKey, options); - return owner.initiateMultipartUpload(initiateMPURequest) - .getUploadId(); + return owner.initiateMultipartUpload(initiateMPURequestBuilder.build()) + .uploadId(); }); } } @@ -346,10 +312,10 @@ public String initiateMultiPartUpload( * @throws IOException on problems. */ @Retries.RetryTranslated - private CompleteMultipartUploadResult finalizeMultipartUpload( + private CompleteMultipartUploadResponse finalizeMultipartUpload( String destKey, String uploadId, - List partETags, + List partETags, long length, PutObjectOptions putOptions, Retried retrying) throws IOException { @@ -358,18 +324,18 @@ private CompleteMultipartUploadResult finalizeMultipartUpload( "No upload parts in multipart upload"); } try (AuditSpan span = activateAuditSpan()) { - CompleteMultipartUploadResult uploadResult; + CompleteMultipartUploadResponse uploadResult; uploadResult = invoker.retry("Completing multipart upload", destKey, true, retrying, () -> { - final CompleteMultipartUploadRequest request = - getRequestFactory().newCompleteMultipartUploadRequest( + final CompleteMultipartUploadRequest.Builder requestBuilder = + getRequestFactory().newCompleteMultipartUploadRequestBuilder( destKey, uploadId, partETags); - return writeOperationHelperCallbacks.completeMultipartUpload(request); + return writeOperationHelperCallbacks.completeMultipartUpload(requestBuilder.build()); }); - owner.finishedWrite(destKey, length, uploadResult.getETag(), - uploadResult.getVersionId(), + owner.finishedWrite(destKey, length, uploadResult.eTag(), + uploadResult.versionId(), putOptions); return uploadResult; } @@ -392,10 +358,10 @@ private CompleteMultipartUploadResult finalizeMultipartUpload( * the retry count was exceeded */ @Retries.RetryTranslated - public CompleteMultipartUploadResult completeMPUwithRetries( + public CompleteMultipartUploadResponse completeMPUwithRetries( String destKey, String uploadId, - List partETags, + List partETags, long length, AtomicInteger errorCount, PutObjectOptions putOptions) @@ -453,7 +419,7 @@ public void abortMultipartUpload(String destKey, String uploadId, @Retries.RetryTranslated public void abortMultipartUpload(MultipartUpload upload) throws IOException { - invoker.retry("Aborting multipart commit", upload.getKey(), true, + invoker.retry("Aborting multipart commit", upload.key(), true, withinAuditSpan(getAuditSpan(), () -> owner.abortMultipartUpload(upload))); } @@ -478,7 +444,7 @@ public int abortMultipartUploadsUnderPath(String prefix) abortMultipartUpload(upload); count++; } catch (FileNotFoundException e) { - LOG.debug("Already aborted: {}", upload.getKey(), e); + LOG.debug("Already aborted: {}", upload.key(), e); } } return count; @@ -507,45 +473,31 @@ public void abortMultipartCommit(String destKey, String uploadId) } /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. + * Create and initialize a part request builder of a multipart upload. * The part number must be less than 10000. * Retry policy is once-translated; to much effort * @param destKey destination key of ongoing operation * @param uploadId ID of ongoing upload * @param partNumber current part number of the upload * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * @return the request builder. * @throws IllegalArgumentException if the parameters are invalid. * @throws PathIOException if the part number is out of range. */ @Override @Retries.OnceTranslated - public UploadPartRequest newUploadPartRequest( + public UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - Long offset) throws IOException { + long size) throws IOException { return once("upload part request", destKey, withinAuditSpan(getAuditSpan(), () -> - getRequestFactory().newUploadPartRequest( + getRequestFactory().newUploadPartRequestBuilder( destKey, uploadId, partNumber, - size, - uploadStream, - sourceFile, - offset))); + size))); } /** @@ -567,18 +519,20 @@ public String toString() { * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile is data to be uploaded a file + * * @return the upload initiated * @throws IOException on problems */ @Retries.RetryTranslated - public PutObjectResult putObject(PutObjectRequest putObjectRequest, - PutObjectOptions putOptions, + public PutObjectResponse putObject(PutObjectRequest putObjectRequest, + PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) throws IOException { - return retry("Writing Object", - putObjectRequest.getKey(), true, - withinAuditSpan(getAuditSpan(), () -> - owner.putObjectDirect(putObjectRequest, putOptions, durationTrackerFactory))); + return retry("Writing Object", putObjectRequest.key(), true, withinAuditSpan(getAuditSpan(), + () -> owner.putObjectDirect(putObjectRequest, putOptions, uploadData, isFile, + durationTrackerFactory))); } /** @@ -614,10 +568,10 @@ public void revertCommit(String destKey) throws IOException { * the retry count was exceeded */ @Retries.RetryTranslated - public CompleteMultipartUploadResult commitUpload( + public CompleteMultipartUploadResponse commitUpload( String destKey, String uploadId, - List partETags, + List partETags, long length) throws IOException { checkNotNull(uploadId); @@ -636,19 +590,21 @@ public CompleteMultipartUploadResult commitUpload( * Upload part of a multi-partition file. * @param request request * @param durationTrackerFactory duration tracker factory for operation + * @param request the upload part request. + * @param body the request body. * @return the result of the operation. * @throws IOException on problems */ @Retries.RetryTranslated - public UploadPartResult uploadPart(UploadPartRequest request, + public UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, final DurationTrackerFactory durationTrackerFactory) throws IOException { - return retry("upload part #" + request.getPartNumber() - + " upload ID " + request.getUploadId(), - request.getKey(), + return retry("upload part #" + request.partNumber() + + " upload ID " + request.uploadId(), + request.key(), true, withinAuditSpan(getAuditSpan(), - () -> owner.uploadPart(request, durationTrackerFactory))); + () -> owner.uploadPart(request, body, durationTrackerFactory))); } /** @@ -660,15 +616,9 @@ public Configuration getConf() { return conf; } - /** - * Create a S3 Select request for the destination path. - * This does not build the query. - * @param path pre-qualified path for query - * @return the request - */ - public SelectObjectContentRequest newSelectRequest(Path path) { + public SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path) { try (AuditSpan span = getAuditSpan()) { - return getRequestFactory().newSelectRequest( + return getRequestFactory().newSelectRequestBuilder( storeContext.pathToKey(path)); } } @@ -677,26 +627,27 @@ public SelectObjectContentRequest newSelectRequest(Path path) { * Execute an S3 Select operation. * On a failure, the request is only logged at debug to avoid the * select exception being printed. - * @param source source for selection + * + * @param source source for selection * @param request Select request to issue. - * @param action the action for use in exception creation + * @param action the action for use in exception creation * @return response * @throws IOException failure */ @Retries.RetryTranslated - public SelectObjectContentResult select( + public SelectEventStreamPublisher select( final Path source, final SelectObjectContentRequest request, final String action) throws IOException { // no setting of span here as the select binding is (statically) created // without any span. - String bucketName = request.getBucketName(); + String bucketName = request.bucket(); Preconditions.checkArgument(bucket.equals(bucketName), "wrong bucket: %s", bucketName); if (LOG.isDebugEnabled()) { LOG.debug("Initiating select call {} {}", - source, request.getExpression()); + source, request.expression()); LOG.debug(SelectBinding.toString(request)); } return invoker.retry( @@ -707,8 +658,9 @@ public SelectObjectContentResult select( try (DurationInfo ignored = new DurationInfo(LOG, "S3 Select operation")) { try { - return writeOperationHelperCallbacks.selectObjectContent(request); - } catch (AmazonS3Exception e) { + return SelectObjectContentHelper.select( + writeOperationHelperCallbacks, source, request, action); + } catch (Throwable e) { LOG.error("Failure of S3 Select request against {}", source); LOG.debug("S3 Select request against {}:\n{}", @@ -759,14 +711,14 @@ public interface WriteOperationHelperCallbacks { * @param request selectObjectContent request * @return selectObjectContentResult */ - SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request); + CompletableFuture selectObjectContent(SelectObjectContentRequest request, SelectObjectContentResponseHandler t); /** * Initiates a complete multi-part upload request. * @param request Complete multi-part upload request * @return completeMultipartUploadResult */ - CompleteMultipartUploadResult completeMultipartUpload(CompleteMultipartUploadRequest request); + CompleteMultipartUploadResponse completeMultipartUpload(CompleteMultipartUploadRequest request); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java index 6dd833761ecbe..a3d3a13043c14 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java @@ -20,23 +20,20 @@ import javax.annotation.Nullable; import java.io.Closeable; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -44,6 +41,7 @@ import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.fs.store.audit.AuditSpan; +import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.util.functional.CallableRaisingIOE; @@ -78,27 +76,15 @@ T retry(String action, /** * Create a {@link PutObjectRequest} request against the specific key. * @param destKey destination key - * @param inputStream source data. * @param length size, if known. Use -1 for not known * @param options options for the request + * @param isFile is data to be uploaded a file * @return the request */ PutObjectRequest createPutObjectRequest(String destKey, - InputStream inputStream, long length, - @Nullable PutObjectOptions options); - - /** - * Create a {@link PutObjectRequest} request to upload a file. - * @param dest key to PUT to. - * @param sourceFile source file - * @param options options for the request - * @return the request - */ - PutObjectRequest createPutObjectRequest( - String dest, - File sourceFile, - @Nullable PutObjectOptions options); + @Nullable PutObjectOptions options, + boolean isFile); /** * Callback on a successful write. @@ -112,15 +98,6 @@ PutObjectRequest createPutObjectRequest( */ void writeFailed(Exception ex); - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * @param length size, if known. Use -1 for not known - * @return a new metadata instance - */ - ObjectMetadata newObjectMetadata(long length); - /** * Start the multipart upload process. * Retry policy: retrying, translated. @@ -149,10 +126,10 @@ PutObjectRequest createPutObjectRequest( * the retry count was exceeded */ @Retries.RetryTranslated - CompleteMultipartUploadResult completeMPUwithRetries( + CompleteMultipartUploadResponse completeMPUwithRetries( String destKey, String uploadId, - List partETags, + List partETags, long length, AtomicInteger errorCount, PutObjectOptions putOptions) @@ -214,31 +191,20 @@ void abortMultipartCommit(String destKey, String uploadId) throws IOException; /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. + * Create and initialize a part request builder of a multipart upload. * @param destKey destination key of ongoing operation * @param uploadId ID of ongoing upload * @param partNumber current part number of the upload * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * @return the request builder. * @throws IllegalArgumentException if the parameters are invalid * @throws PathIOException if the part number is out of range. */ - UploadPartRequest newUploadPartRequest( + UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - Long offset) throws IOException; + long size) throws IOException; /** * PUT an object directly (i.e. not via the transfer manager). @@ -247,12 +213,14 @@ UploadPartRequest newUploadPartRequest( * @param putObjectRequest the request * @param putOptions put object options * @param durationTrackerFactory factory for duration tracking + * @param uploadData data to be uploaded + * @param isFile is data to be uploaded a file * @return the upload initiated * @throws IOException on problems */ @Retries.RetryTranslated - PutObjectResult putObject(PutObjectRequest putObjectRequest, - PutObjectOptions putOptions, + PutObjectResponse putObject(PutObjectRequest putObjectRequest, + PutObjectOptions putOptions, S3ADataBlocks.BlockUploadData uploadData, boolean isFile, DurationTrackerFactory durationTrackerFactory) throws IOException; @@ -280,22 +248,23 @@ PutObjectResult putObject(PutObjectRequest putObjectRequest, * the retry count was exceeded */ @Retries.RetryTranslated - CompleteMultipartUploadResult commitUpload( + CompleteMultipartUploadResponse commitUpload( String destKey, String uploadId, - List partETags, + List partETags, long length) throws IOException; /** * Upload part of a multi-partition file. - * @param request request + * @param request the upload part request. + * @param body the request body. * @param durationTrackerFactory factory for duration tracking * @return the result of the operation. * @throws IOException on problems */ @Retries.RetryTranslated - UploadPartResult uploadPart(UploadPartRequest request, + UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, DurationTrackerFactory durationTrackerFactory) throws IOException; @@ -313,25 +282,26 @@ UploadPartResult uploadPart(UploadPartRequest request, AuditSpan getAuditSpan(); /** - * Create a S3 Select request for the destination path. + * Create a S3 Select request builder for the destination path. * This does not build the query. * @param path pre-qualified path for query - * @return the request + * @return the request builder */ - SelectObjectContentRequest newSelectRequest(Path path); + SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path); /** * Execute an S3 Select operation. * On a failure, the request is only logged at debug to avoid the * select exception being printed. - * @param source source for selection + * + * @param source source for selection * @param request Select request to issue. - * @param action the action for use in exception creation + * @param action the action for use in exception creation * @return response * @throws IOException failure */ @Retries.RetryTranslated - SelectObjectContentResult select( + SelectEventStreamPublisher select( Path source, SelectObjectContentRequest request, String action) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java new file mode 100644 index 0000000000000..db82267044404 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSSessionCredentials; +import com.amazonaws.auth.AnonymousAWSCredentials; + +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; + +/** + * Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface. + * Implements both interfaces so can be used with either the V1 or V2 AWS SDK. + */ +final class V1ToV2AwsCredentialProviderAdapter implements V1V2AwsCredentialProviderAdapter { + + private final AWSCredentialsProvider v1CredentialsProvider; + + private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsProvider) { + this.v1CredentialsProvider = v1CredentialsProvider; + } + + @Override + public AwsCredentials resolveCredentials() { + AWSCredentials toAdapt = v1CredentialsProvider.getCredentials(); + if (toAdapt instanceof AWSSessionCredentials) { + return AwsSessionCredentials.create(toAdapt.getAWSAccessKeyId(), + toAdapt.getAWSSecretKey(), + ((AWSSessionCredentials) toAdapt).getSessionToken()); + } else if (toAdapt instanceof AnonymousAWSCredentials) { + return AnonymousCredentialsProvider.create().resolveCredentials(); + } else { + return AwsBasicCredentials.create(toAdapt.getAWSAccessKeyId(), toAdapt.getAWSSecretKey()); + } + } + + @Override + public AWSCredentials getCredentials() { + return v1CredentialsProvider.getCredentials(); + } + + @Override + public void refresh() { + v1CredentialsProvider.refresh(); + } + + /** + * @param v1CredentialsProvider V1 credential provider to adapt. + * @return A new instance of the credentials provider adapter. + */ + static V1ToV2AwsCredentialProviderAdapter create(AWSCredentialsProvider v1CredentialsProvider) { + return new V1ToV2AwsCredentialProviderAdapter(v1CredentialsProvider); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java new file mode 100644 index 0000000000000..f27166a9ef91d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +public interface V1V2AwsCredentialProviderAdapter extends AWSCredentialsProvider, + AwsCredentialsProvider { + + /** + * Creates a two-way adapter from a V1 {@link AWSCredentialsProvider} interface. + * + * @param v1CredentialsProvider V1 credentials provider. + * @return Two-way credential provider adapter. + */ + static V1V2AwsCredentialProviderAdapter adapt(AWSCredentialsProvider v1CredentialsProvider) { + return V1ToV2AwsCredentialProviderAdapter.create(v1CredentialsProvider); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java new file mode 100644 index 0000000000000..8d03c915e171a --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Adapter classes for allowing V1 credential providers to be used with SDKV2. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.s3a.adapter; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 2a4771925f086..4806543815cdb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -19,39 +19,33 @@ package org.apache.hadoop.fs.s3a.api; import javax.annotation.Nullable; -import java.io.File; -import java.io.InputStream; import java.util.List; -import java.util.Optional; - -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ObjectCannedACL; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; + /** * Factory for S3 objects. * @@ -79,22 +73,7 @@ public interface RequestFactory { * Get the canned ACL of this FS. * @return an ACL, if any */ - CannedAccessControlList getCannedACL(); - - /** - * Create the AWS SDK structure used to configure SSE, - * if the encryption secrets contain the information/settings for this. - * @return an optional set of KMS Key settings - */ - Optional generateSSEAwsKeyParams(); - - /** - * Create the SSE-C structure for the AWS SDK, if the encryption secrets - * contain the information/settings for this. - * This will contain a secret extracted from the bucket/configuration. - * @return an optional customer key. - */ - Optional generateSSECustomerKey(); + ObjectCannedACL getCannedACL(); /** * Get the encryption algorithm of this endpoint. @@ -115,79 +94,58 @@ public interface RequestFactory { StorageClass getStorageClass(); /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @return a new metadata instance - */ - ObjectMetadata newObjectMetadata(long length); - - /** - * Create a copy request. + * Create a copy request builder. * This includes the work of copying the relevant parts * of the metadata from the source * @param srcKey source * @param dstKey destination * @param srcom source object metadata. - * @return the request + * @return the request builder */ - CopyObjectRequest newCopyObjectRequest(String srcKey, + CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, String dstKey, - ObjectMetadata srcom); + HeadObjectResponse srcom); - /** - * Create a putObject request. - * Adds the ACL and metadata - * @param key key of object - * @param metadata metadata header - * @param options options for the request - * @param srcfile source file - * @return the request - */ - PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, PutObjectOptions options, File srcfile); /** - * Create a {@link PutObjectRequest} request. + * Create a {@link PutObjectRequest} request builder. * The metadata is assumed to have been configured with the size of the * operation. * @param key key of object - * @param metadata metadata header * @param options options for the request - * @param inputStream source data. - * @return the request + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker + * @return the request builder */ - PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, + PutObjectRequest.Builder newPutObjectRequestBuilder(String key, PutObjectOptions options, - InputStream inputStream); + long length, + boolean isDirectoryMarker); /** * Create a {@link PutObjectRequest} request for creating * an empty directory. * * @param directory destination directory. - * @return request for a zero byte upload. + * @return request builder for a zero byte upload. */ - PutObjectRequest newDirectoryMarkerRequest(String directory); + PutObjectRequest.Builder newDirectoryMarkerRequest(String directory); /** * List all multipart uploads under a prefix. * @param prefix prefix to list under - * @return the request. + * @return the request builder. */ - ListMultipartUploadsRequest newListMultipartUploadsRequest( + ListMultipartUploadsRequest.Builder newListMultipartUploadsRequestBuilder( @Nullable String prefix); /** * Abort a multipart upload. * @param destKey destination object key * @param uploadId ID of initiated upload - * @return the request. + * @return the request builder. */ - AbortMultipartUploadRequest newAbortMultipartUploadRequest( + AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder( String destKey, String uploadId); @@ -195,10 +153,10 @@ AbortMultipartUploadRequest newAbortMultipartUploadRequest( * Start a multipart upload. * @param destKey destination object key * @param options options for the request - * @return the request. + * @return the request builder. * @throws PathIOException if multipart uploads are disabled */ - InitiateMultipartUploadRequest newMultipartUploadRequest( + CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder( String destKey, @Nullable PutObjectOptions options) throws PathIOException; @@ -207,107 +165,88 @@ InitiateMultipartUploadRequest newMultipartUploadRequest( * @param destKey destination object key * @param uploadId ID of initiated upload * @param partETags ordered list of etags - * @return the request. + * @return the request builder. */ - CompleteMultipartUploadRequest newCompleteMultipartUploadRequest( + CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder( String destKey, String uploadId, - List partETags); + List partETags); /** - * Create a HEAD request. + * Create a HEAD request builder. * @param key key, may have trailing / - * @return the request. + * @return the request builder. */ - GetObjectMetadataRequest newGetObjectMetadataRequest(String key); + HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key); + /** - * Create a GET request. + * Create a GET request builder. * @param key object key - * @return the request. + * @return the request builder. */ - GetObjectRequest newGetObjectRequest(String key); + GetObjectRequest.Builder newGetObjectRequestBuilder(String key); /** - * Create and initialize a part request of a multipart upload. - * Exactly one of: {@code uploadStream} or {@code sourceFile} - * must be specified. - * A subset of the file may be posted, by providing the starting point - * in {@code offset} and a length of block in {@code size} equal to - * or less than the remaining bytes. - * @param destKey destination key of ongoing operation - * @param uploadId ID of ongoing upload - * @param partNumber current part number of the upload - * @param size amount of data - * @param uploadStream source of data to upload - * @param sourceFile optional source file. - * @param offset offset in file to start reading. - * @return the request. + * Create and initialize a part request builder of a multipart upload. + * + * @param destKey destination key of ongoing operation + * @param uploadId ID of ongoing upload + * @param partNumber current part number of the upload + * @param size amount of data + * @return the request builder. * @throws PathIOException if the part number is out of range. */ - UploadPartRequest newUploadPartRequest( + UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - long offset) throws PathIOException; + long size) throws PathIOException; /** - * Create a S3 Select request for the destination object. + * Create a S3 Select request builder for the destination object. * This does not build the query. * @param key object key - * @return the request + * @return the request builder */ - SelectObjectContentRequest newSelectRequest(String key); + SelectObjectContentRequest.Builder newSelectRequestBuilder(String key); /** - * Create the (legacy) V1 list request. + * Create the (legacy) V1 list request builder. * @param key key to list under * @param delimiter delimiter for keys * @param maxKeys maximum number in a list page. - * @return the request + * @return the request builder. */ - ListObjectsRequest newListObjectsV1Request(String key, + ListObjectsRequest.Builder newListObjectsV1RequestBuilder(String key, String delimiter, int maxKeys); /** - * Create the next V1 page list request, following - * on from the previous response. - * @param prev previous response - * @return the request - */ - - ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest( - ObjectListing prev); - - /** - * Create a V2 list request. + * Create a V2 list request builder. * This will be recycled for any subsequent requests. * @param key key to list under * @param delimiter delimiter for keys * @param maxKeys maximum number in a list page. - * @return the request + * @return the request builder. */ - ListObjectsV2Request newListObjectsV2Request(String key, + ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key, String delimiter, int maxKeys); /** - * Create a request to delete a single object. + * Create a request builder to delete a single object. * @param key object to delete - * @return the request + * @return the request builder. */ - DeleteObjectRequest newDeleteObjectRequest(String key); + DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key); /** - * Bulk delete request. + * Create a request builder to delete objects in bulk. * @param keysToDelete list of keys to delete. - * @return the request + * @return the request builder. */ - DeleteObjectsRequest newBulkDeleteRequest( - List keysToDelete); + DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder( + List keysToDelete); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java index 8134d5cea942e..712246c21961c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSAuditEventCallbacks.java @@ -18,15 +18,9 @@ package org.apache.hadoop.fs.s3a.audit; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.SdkBaseException; -import com.amazonaws.handlers.HandlerAfterAttemptContext; -import com.amazonaws.handlers.HandlerBeforeAttemptContext; -import com.amazonaws.http.HttpResponse; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import org.apache.hadoop.fs.s3a.Retries; /** * Callbacks for audit spans. This is implemented @@ -37,10 +31,10 @@ * detect this and raise an exception. * * Look at the documentation for - * {@code com.amazonaws.handlers.IRequestHandler2} for details + * {@code ExecutionInterceptor} for details * on the callbacks. */ -public interface AWSAuditEventCallbacks { +public interface AWSAuditEventCallbacks extends ExecutionInterceptor { /** * Return a span ID which must be unique for all spans within @@ -66,95 +60,8 @@ public interface AWSAuditEventCallbacks { * It is not invoked on any AWS requests created in the SDK. * Avoid raising exceptions or talking to any remote service; * this callback is for annotation rather than validation. - * @param request request request. - * @param type of request - * @return the request, possibly modified. + * @param builder the request builder. */ - default T requestCreated(T request) { - return request; - } + default void requestCreated(SdkRequest.Builder builder) {} - /** - * Preflight preparation of AWS request. - * @param request request - * @param type of request - * @return an updated request. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - @Retries.OnceRaw - default T beforeExecution(T request) - throws AuditFailureException, SdkBaseException { - return request; - } - - /** - * Callback after S3 responded to a request. - * @param request request - * @param response response. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - default void afterResponse(Request request, - Response response) - throws AuditFailureException, SdkBaseException { - } - - /** - * Callback after a request resulted in an error. - * @param request request - * @param response response. - * @param exception exception raised. - * @throws AuditFailureException for generic audit failures - * @throws SdkBaseException for other reasons. - */ - default void afterError(Request request, - Response response, - Exception exception) - throws AuditFailureException, SdkBaseException { - } - - /** - * Request before marshalling. - * @param request request - * @return possibly modified request. - */ - default AmazonWebServiceRequest beforeMarshalling( - AmazonWebServiceRequest request) { - return request; - } - - /** - * Request before marshalling. - * @param request request - */ - default void beforeRequest(Request request) { - } - - /** - * Before any attempt is made. - * @param context full context, including the request. - */ - default void beforeAttempt(HandlerBeforeAttemptContext context) { - } - - /** - * After any attempt is made. - * @param context full context, including the request. - */ - default void afterAttempt( - HandlerAfterAttemptContext context) { - } - - /** - * Before unmarshalling the response. - * @param request request made. - * @param httpResponse response received - * @return updated response. - */ - default HttpResponse beforeUnmarshalling( - final Request request, - final HttpResponse httpResponse) { - return httpResponse; - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java index b4be341c912e0..8a24a4e14db1c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java @@ -20,24 +20,23 @@ import java.util.List; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetBucketLocationRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_GET_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_HTTP_HEAD_REQUEST; @@ -64,102 +63,85 @@ public class AWSRequestAnalyzer { * read/write and path. * @param request request. * @return information about the request. - * @param type of request. */ - public RequestInfo analyze(T request) { + public RequestInfo analyze(SdkRequest request) { // this is where Scala's case statement would massively // simplify life. // Please Keep in Alphabetical Order. if (request instanceof AbortMultipartUploadRequest) { return writing(MULTIPART_UPLOAD_ABORTED, - ((AbortMultipartUploadRequest) request).getKey(), + ((AbortMultipartUploadRequest) request).key(), 0); } else if (request instanceof CompleteMultipartUploadRequest) { CompleteMultipartUploadRequest r = (CompleteMultipartUploadRequest) request; return writing(MULTIPART_UPLOAD_COMPLETED, - r.getKey(), - r.getPartETags().size()); + r.key(), + r.multipartUpload().parts().size()); + } else if (request instanceof CreateMultipartUploadRequest) { + return writing(MULTIPART_UPLOAD_STARTED, + ((CreateMultipartUploadRequest) request).key(), + 0); } else if (request instanceof DeleteObjectRequest) { // DeleteObject: single object return writing(OBJECT_DELETE_REQUEST, - ((DeleteObjectRequest) request).getKey(), + ((DeleteObjectRequest) request).key(), 1); } else if (request instanceof DeleteObjectsRequest) { // DeleteObjects: bulk delete // use first key as the path DeleteObjectsRequest r = (DeleteObjectsRequest) request; - List keys - = r.getKeys(); + List objectIdentifiers + = r.delete().objects(); return writing(OBJECT_BULK_DELETE_REQUEST, - keys.isEmpty() ? null : keys.get(0).getKey(), - keys.size()); + objectIdentifiers.isEmpty() ? null : objectIdentifiers.get(0).key(), + objectIdentifiers.size()); } else if (request instanceof GetBucketLocationRequest) { GetBucketLocationRequest r = (GetBucketLocationRequest) request; return reading(STORE_EXISTS_PROBE, - r.getBucketName(), + r.bucket(), 0); - } else if (request instanceof GetObjectMetadataRequest) { - return reading(ACTION_HTTP_HEAD_REQUEST, - ((GetObjectMetadataRequest) request).getKey(), 0); } else if (request instanceof GetObjectRequest) { GetObjectRequest r = (GetObjectRequest) request; - long[] range = r.getRange(); - long size = range == null - ? -1 - : range[1] - range[0]; return reading(ACTION_HTTP_GET_REQUEST, - r.getKey(), - size); - } else if (request instanceof InitiateMultipartUploadRequest) { - return writing(MULTIPART_UPLOAD_STARTED, - ((InitiateMultipartUploadRequest) request).getKey(), - 0); + r.key(), + sizeFromRangeHeader(r.range())); + } else if (request instanceof HeadObjectRequest) { + return reading(ACTION_HTTP_HEAD_REQUEST, + ((HeadObjectRequest) request).key(), 0); } else if (request instanceof ListMultipartUploadsRequest) { ListMultipartUploadsRequest r = (ListMultipartUploadsRequest) request; return reading(MULTIPART_UPLOAD_LIST, - r.getPrefix(), - r.getMaxUploads()); + r.prefix(), + r.maxUploads()); } else if (request instanceof ListObjectsRequest) { ListObjectsRequest r = (ListObjectsRequest) request; return reading(OBJECT_LIST_REQUEST, - r.getPrefix(), - r.getMaxKeys()); - } else if (request instanceof ListNextBatchOfObjectsRequest) { - ListNextBatchOfObjectsRequest r = (ListNextBatchOfObjectsRequest) request; - ObjectListing l = r.getPreviousObjectListing(); - String prefix = ""; - int size = 0; - if (l != null) { - prefix = l.getPrefix(); - size = l.getMaxKeys(); - } - return reading(OBJECT_LIST_REQUEST, - prefix, - size); + r.prefix(), + r.maxKeys()); } else if (request instanceof ListObjectsV2Request) { ListObjectsV2Request r = (ListObjectsV2Request) request; return reading(OBJECT_LIST_REQUEST, - r.getPrefix(), - r.getMaxKeys()); + r.prefix(), + r.maxKeys()); } else if (request instanceof PutObjectRequest) { PutObjectRequest r = (PutObjectRequest) request; return writing(OBJECT_PUT_REQUEST, - r.getKey(), + r.key(), 0); } else if (request instanceof SelectObjectContentRequest) { SelectObjectContentRequest r = (SelectObjectContentRequest) request; return reading(OBJECT_SELECT_REQUESTS, - r.getKey(), + r.key(), 1); } else if (request instanceof UploadPartRequest) { UploadPartRequest r = (UploadPartRequest) request; return writing(MULTIPART_UPLOAD_PART_PUT, - r.getKey(), - r.getPartSize()); + r.key(), + r.contentLength()); } // no explicit support, return classname return writing(request.getClass().getName(), null, 0); @@ -212,7 +194,7 @@ private RequestInfo writing(final String verb, */ public static boolean isRequestNotAlwaysInSpan(final Object request) { - return request instanceof CopyPartRequest + return request instanceof UploadPartCopyRequest || request instanceof CompleteMultipartUploadRequest || request instanceof GetBucketLocationRequest; } @@ -225,9 +207,9 @@ private RequestInfo writing(final String verb, * @return true if the transfer manager creates them. */ public static boolean isRequestMultipartIO(final Object request) { - return request instanceof CopyPartRequest + return request instanceof UploadPartCopyRequest || request instanceof CompleteMultipartUploadRequest - || request instanceof InitiateMultipartUploadRequest + || request instanceof CreateMultipartUploadRequest || request instanceof UploadPartRequest; } @@ -307,4 +289,23 @@ public String toString() { private static long toSafeLong(final Number size) { return size != null ? size.longValue() : 0; } + + private static final String BYTES_PREFIX = "bytes="; + + private static Number sizeFromRangeHeader(String rangeHeader) { + if (rangeHeader != null && rangeHeader.startsWith(BYTES_PREFIX)) { + String[] values = rangeHeader + .substring(BYTES_PREFIX.length()) + .split("-"); + if (values.length == 2) { + try { + long start = Long.parseUnsignedLong(values[0]); + long end = Long.parseUnsignedLong(values[0]); + return end - start; + } catch(NumberFormatException e) { + } + } + } + return -1; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java index c66f45eb309ff..4389c49d866bb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java @@ -23,7 +23,6 @@ import java.lang.reflect.InvocationTargetException; import java.nio.file.AccessDeniedException; -import com.amazonaws.HandlerContextAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,10 +33,12 @@ import org.apache.hadoop.fs.s3a.audit.impl.NoopAuditManagerS3A; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED_DEFAULT; -import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_HANDLER_CONTEXT; +import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_EXECUTION_ATTRIBUTE; /** * Support for integrating auditing within the S3A code. @@ -123,25 +124,24 @@ public static OperationAuditor createAndInitAuditor( } /** - * Get the span from a handler context. - * @param request request - * @param type of request. + * Get the span from the execution attributes. + * @param executionAttributes the execution attributes * @return the span callbacks or null */ - public static AWSAuditEventCallbacks - retrieveAttachedSpan(final T request) { - return request.getHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT); + public static AuditSpanS3A + retrieveAttachedSpan(final ExecutionAttributes executionAttributes) { + return executionAttributes.getAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE); } /** - * Attach a span to a handler context. - * @param request request + * Attach a span to the execution attributes. + * @param executionAttributes the execution attributes * @param span span to attach - * @param type of request. */ - public static void attachSpanToRequest( - final T request, final AWSAuditEventCallbacks span) { - request.addHandlerContext(AUDIT_SPAN_HANDLER_CONTEXT, span); + public static void attachSpanToRequest( + final ExecutionAttributes executionAttributes, + final AuditSpanS3A span) { + executionAttributes.putAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE, span); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java index c1302d57454b1..e83216b3a75bc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java @@ -21,9 +21,6 @@ import java.io.IOException; import java.util.List; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; @@ -32,6 +29,9 @@ import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.service.Service; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + /** * Interface for Audit Managers auditing operations through the * AWS libraries. @@ -56,24 +56,24 @@ public interface AuditManagerS3A extends Service, OperationAuditor getAuditor(); /** - * Create the request handler(s) for this audit service. - * The list returned is mutable; new handlers may be added. - * @return list of handlers for the SDK. + * Create the execution interceptor(s) for this audit service. + * The list returned is mutable; new interceptors may be added. + * @return list of interceptors for the SDK. * @throws IOException failure. */ - List createRequestHandlers() throws IOException; + List createExecutionInterceptors() throws IOException; /** - * Return a transfer state change callback which + * Return a transfer callback which * fixes the active span context to be that in which - * the state change listener was created. + * the transfer listener was created. * This can be used to audit the creation of the multipart * upload initiation request which the transfer manager * makes when a file to be copied is split up. * This must be invoked/used within the active span. - * @return a state change listener. + * @return a transfer listener. */ - TransferStateChangeListener createStateChangeListener(); + TransferListener createTransferListener(); /** * Check for permission to access a path. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java index 1d76833f8ceab..55deb0a1989de 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/S3AAuditConstants.java @@ -66,13 +66,20 @@ private S3AAuditConstants() { "org.apache.hadoop.fs.s3a.audit.impl.NoopAuditor"; /** - * List of extra AWS SDK request handlers: {@value}. - * These are added to the SDK request chain after - * any audit service. + * Deprecated list of extra AWS SDK request handlers: {@value}. + * Use {@link #AUDIT_EXECUTION_INTERCEPTORS} instead. */ public static final String AUDIT_REQUEST_HANDLERS = "fs.s3a.audit.request.handlers"; + /** + * List of extra AWS SDK execution interceptors: {@value}. + * These are added to the SDK request chain after + * any audit service. + */ + public static final String AUDIT_EXECUTION_INTERCEPTORS = + "fs.s3a.audit.execution.interceptors"; + /** * Should operations outside spans be rejected? * This is for testing coverage of the span code; if used diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java index 3d2102d305c7d..0a05a852a0fd7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java @@ -25,16 +25,6 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.HandlerContextAware; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.SdkBaseException; -import com.amazonaws.handlers.HandlerAfterAttemptContext; -import com.amazonaws.handlers.HandlerBeforeAttemptContext; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,16 +46,27 @@ import org.apache.hadoop.fs.s3a.audit.OperationAuditor; import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions; import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; +import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.functional.FutureIO; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.SdkResponse; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.http.SdkHttpResponse; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.retrieveAttachedSpan; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; /** @@ -82,10 +83,11 @@ * will deactivate the wrapped span and then * switch the active span to the unbounded span. * - * The inner class {@link AWSAuditEventCallbacks} is returned - * as a request handler in {@link #createRequestHandlers()}; - * this forwards all requests to the outer {@code ActiveAuditManagerS3A}, - * which then locates the active span and forwards the request. + * This class also implements {@link ExecutionInterceptor} and + * returns itself in {@link #createExecutionInterceptors()}; + * once registered with the S3 client, the implemented methods + * will be called during different parts of an SDK request lifecycle, + * which then locate the active span and forward the request. * If any such invocation raises an {@link AuditFailureException} * then the IOStatistics counter for {@code AUDIT_FAILURE} * is incremented. @@ -390,25 +392,32 @@ public AuditSpanS3A createSpan(final String operation, } /** - * Return a request handler for the AWS SDK which + * Return a list of execution interceptors for the AWS SDK which * relays to this class. - * @return a request handler. + * @return a list of execution interceptors. */ @Override - public List createRequestHandlers() + public List createExecutionInterceptors() throws IOException { // wire up the AWS SDK To call back into this class when // preparing to make S3 calls. - List requestHandlers = new ArrayList<>(); - requestHandlers.add(new SdkRequestHandler()); - // now look for any more handlers - final Class[] handlers = getConfig().getClasses(AUDIT_REQUEST_HANDLERS); + List executionInterceptors = new ArrayList<>(); + executionInterceptors.add(this); + + final String handlers = getConfig().get(AUDIT_REQUEST_HANDLERS); if (handlers != null) { - for (Class handler : handlers) { + V2Migration.v1RequestHandlersUsed(); + } + + // TODO: should we remove this and use Global/Service interceptors, see: + // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html + final Class[] interceptors = getConfig().getClasses(AUDIT_EXECUTION_INTERCEPTORS); + if (interceptors != null) { + for (Class handler : interceptors) { try { Constructor ctor = handler.getConstructor(); - requestHandlers.add((RequestHandler2)ctor.newInstance()); + executionInterceptors.add((ExecutionInterceptor) ctor.newInstance()); } catch (ExceptionInInitializerError e) { throw FutureIO.unwrapInnerException(e); } catch (Exception e) { @@ -416,13 +425,18 @@ public List createRequestHandlers() } } } - return requestHandlers; + return executionInterceptors; } @Override - public TransferStateChangeListener createStateChangeListener() { + public TransferListener createTransferListener() { final WrappingAuditSpan span = activeSpan(); - return (transfer, state) -> switchToActiveSpan(span); + return new TransferListener() { + @Override + public void transferInitiated(Context.TransferInitiated context) { + switchToActiveSpan(span); + } + }; } @Override @@ -434,20 +448,18 @@ public boolean checkAccess(final Path path, } /** - * Attach a reference to the active thread span, then - * invoke the same callback on that active thread. + * Audit the creation of a request and retrieve + * a reference to the active thread span. */ @Override - public T requestCreated( - final T request) { + public void requestCreated(final SdkRequest.Builder builder) { AuditSpanS3A span = getActiveAuditSpan(); if (LOG.isTraceEnabled()) { LOG.trace("Created Request {} in span {}", - analyzer.analyze(request), span); + analyzer.analyze(builder.build()), span); } - attachSpanToRequest(request, span); try { - return span.requestCreated(request); + span.requestCreated(builder); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -463,14 +475,13 @@ public T requestCreated( * {@inheritDoc} */ @Override - public T beforeExecution( - final T request) { + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { ioStatisticsStore.incrementCounter(AUDIT_REQUEST_EXECUTION.getSymbol()); - - // identify the span and invoke the callback + AuditSpanS3A span = getActiveAuditSpan(); + attachSpanToRequest(executionAttributes, span); try { - return extractAndActivateSpanFromRequest(request) - .beforeExecution(request); + span.beforeExecution(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -479,16 +490,14 @@ public T beforeExecution( /** * Forward to active span. - * @param request request - * @param response response. + * {@inheritDoc} */ @Override - public void afterResponse(final Request request, - final Response response) - throws AuditFailureException, SdkBaseException { + public void afterExecution(Context.AfterExecution context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .afterResponse(request, response); + extractAndActivateSpanFromRequest(context.request(), executionAttributes) + .afterExecution(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -496,18 +505,19 @@ public void afterResponse(final Request request, } /** - * Get the active span from the handler context, + * Get the active span from the execution attributes, * falling back to the active thread span if there - * is nothing in the context. - * Provided the span is a wrapped span, the + * is nothing in the attributes. + * Provided the span is a wrapped span, the span is + * activated. * @param request request - * @param type of request. - * @return the callbacks + * @param executionAttributes the execution attributes + * @return the active span */ - private AWSAuditEventCallbacks - extractAndActivateSpanFromRequest(final T request) { - AWSAuditEventCallbacks span; - span = retrieveAttachedSpan(request); + private AuditSpanS3A extractAndActivateSpanFromRequest( + final SdkRequest request, + final ExecutionAttributes executionAttributes) { + AuditSpanS3A span = retrieveAttachedSpan(executionAttributes); if (span == null) { // no span is attached. Not unusual for the copy operations, // or for calls to GetBucketLocation made by the AWS client @@ -531,17 +541,14 @@ public void afterResponse(final Request request, /** * Forward to active span. * @param request request - * @param response response. - * @param exception exception raised. - */ - @Override - public void afterError(final Request request, - final Response response, - final Exception exception) - throws AuditFailureException, SdkBaseException { + * {@inheritDoc} + */@Override + public void onExecutionFailure(Context.FailedExecution context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .afterError(request, response, exception); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .onExecutionFailure(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -549,11 +556,12 @@ public void afterError(final Request request, } @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { + public SdkRequest modifyRequest(Context.ModifyRequest context, + ExecutionAttributes executionAttributes) { try { - return extractAndActivateSpanFromRequest(request) - .beforeMarshalling(request); + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyRequest(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -561,10 +569,12 @@ public AmazonWebServiceRequest beforeMarshalling( } @Override - public void beforeRequest(final Request request) { + public void beforeMarshalling(Context.BeforeMarshalling context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request) - .beforeRequest(request); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeMarshalling(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -572,10 +582,12 @@ public void beforeRequest(final Request request) { } @Override - public void beforeAttempt(final HandlerBeforeAttemptContext context) { + public void afterMarshalling(Context.AfterMarshalling context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(context.getRequest()) - .beforeAttempt(context); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterMarshalling(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -583,10 +595,12 @@ public void beforeAttempt(final HandlerBeforeAttemptContext context) { } @Override - public void afterAttempt(final HandlerAfterAttemptContext context) { + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(context.getRequest()) - .afterAttempt(context); + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyHttpRequest(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; @@ -594,73 +608,80 @@ public void afterAttempt(final HandlerAfterAttemptContext context) { } @Override - public HttpResponse beforeUnmarshalling(final Request request, - final HttpResponse httpResponse) { + public void beforeTransmission(Context.BeforeTransmission context, + ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(request.getOriginalRequest()) - .beforeUnmarshalling(request, httpResponse); + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeTransmission(context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; } - return httpResponse; } - /** - * Callbacks from the AWS SDK; all forward to the ActiveAuditManagerS3A. - * We need a separate class because the SDK requires the handler list - * to be list of {@code RequestHandler2} instances. - */ - private class SdkRequestHandler extends RequestHandler2 { - - @Override - public AmazonWebServiceRequest beforeExecution( - final AmazonWebServiceRequest request) { - return ActiveAuditManagerS3A.this.beforeExecution(request); - } - - @Override - public void afterResponse(final Request request, - final Response response) { - ActiveAuditManagerS3A.this.afterResponse(request, response); - } - - @Override - public void afterError(final Request request, - final Response response, - final Exception e) { - ActiveAuditManagerS3A.this.afterError(request, response, e); - } - - @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { - return ActiveAuditManagerS3A.this.beforeMarshalling(request); + @Override + public void afterTransmission(Context.AfterTransmission context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterTransmission(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void beforeRequest(final Request request) { - ActiveAuditManagerS3A.this.beforeRequest(request); + @Override + public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context, + ExecutionAttributes executionAttributes) { + try { + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyHttpResponse(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void beforeAttempt( - final HandlerBeforeAttemptContext context) { - ActiveAuditManagerS3A.this.beforeAttempt(context); + @Override + public void beforeUnmarshalling(Context.BeforeUnmarshalling context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .beforeUnmarshalling(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public HttpResponse beforeUnmarshalling( - final Request request, - final HttpResponse httpResponse) { - return ActiveAuditManagerS3A.this.beforeUnmarshalling(request, - httpResponse); + @Override + public void afterUnmarshalling(Context.AfterUnmarshalling context, + ExecutionAttributes executionAttributes) { + try { + extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .afterUnmarshalling(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } + } - @Override - public void afterAttempt( - final HandlerAfterAttemptContext context) { - ActiveAuditManagerS3A.this.afterAttempt(context); + @Override + public SdkResponse modifyResponse(Context.ModifyResponse context, + ExecutionAttributes executionAttributes) { + try { + return extractAndActivateSpanFromRequest(context.request(), + executionAttributes) + .modifyResponse(context, executionAttributes); + } catch (AuditFailureException e) { + ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); + throw e; } } @@ -748,9 +769,8 @@ public void deactivate() { * {@inheritDoc} */ @Override - public T requestCreated( - final T request) { - return span.requestCreated(request); + public void requestCreated(final SdkRequest.Builder builder) { + span.requestCreated(builder); } /** @@ -774,79 +794,132 @@ public void set(final String key, final String value) { /** * Forward to the inner span. - * @param request request - * @param type of request - * @return an updated request. + * {@inheritDoc} + */ + @Override + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + span.beforeExecution(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void afterExecution(Context.AfterExecution context, + ExecutionAttributes executionAttributes) { + span.afterExecution(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void onExecutionFailure(Context.FailedExecution context, + ExecutionAttributes executionAttributes) { + span.onExecutionFailure(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void beforeMarshalling(Context.BeforeMarshalling context, + ExecutionAttributes executionAttributes) { + span.beforeMarshalling(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} */ @Override - public T beforeExecution( - final T request) { - return span.beforeExecution(request); + public SdkRequest modifyRequest(Context.ModifyRequest context, + ExecutionAttributes executionAttributes) { + return span.modifyRequest(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @param response response. + * {@inheritDoc} */ @Override - public void afterResponse(final Request request, - final Response response) { - span.afterResponse(request, response); + public void afterMarshalling(Context.AfterMarshalling context, + ExecutionAttributes executionAttributes) { + span.afterMarshalling(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @param response response. - * @param exception exception raised. + * {@inheritDoc} */ @Override - public void afterError(final Request request, - final Response response, - final Exception exception) { - span.afterError(request, response, exception); + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { + return span.modifyHttpRequest(context, executionAttributes); } /** * Forward to the inner span. - * @param request request - * @return request to marshall + * {@inheritDoc} */ @Override - public AmazonWebServiceRequest beforeMarshalling( - final AmazonWebServiceRequest request) { - return span.beforeMarshalling(request); + public void beforeTransmission(Context.BeforeTransmission context, + ExecutionAttributes executionAttributes) { + span.beforeTransmission(context, executionAttributes); } /** * Forward to the inner span. - * @param request request + * {@inheritDoc} */ @Override - public void beforeRequest(final Request request) { - span.beforeRequest(request); + public void afterTransmission(Context.AfterTransmission context, + ExecutionAttributes executionAttributes) { + span.afterTransmission(context, executionAttributes); } /** * Forward to the inner span. - * @param context full context, including the request. + * {@inheritDoc} */ @Override - public void beforeAttempt( - final HandlerBeforeAttemptContext context) { - span.beforeAttempt(context); + public SdkHttpResponse modifyHttpResponse(Context.ModifyHttpResponse context, + ExecutionAttributes executionAttributes) { + return span.modifyHttpResponse(context, executionAttributes); } /** * Forward to the inner span. - * - * @param context full context, including the request. + * {@inheritDoc} */ @Override - public void afterAttempt( - final HandlerAfterAttemptContext context) { - span.afterAttempt(context); + public void beforeUnmarshalling(Context.BeforeUnmarshalling context, + ExecutionAttributes executionAttributes) { + span.beforeUnmarshalling(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public void afterUnmarshalling(Context.AfterUnmarshalling context, + ExecutionAttributes executionAttributes) { + span.afterUnmarshalling(context, executionAttributes); + } + + /** + * Forward to the inner span. + * {@inheritDoc} + */ + @Override + public SdkResponse modifyResponse(Context.ModifyResponse context, + ExecutionAttributes executionAttributes) { + return span.modifyResponse(context, executionAttributes); } @Override @@ -859,5 +932,4 @@ public String toString() { return sb.toString(); } } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index fcf2015487c48..0dd0976acc511 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -24,10 +24,11 @@ import java.util.HashMap; import java.util.Map; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.http.SdkHttpRequest; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -260,21 +261,21 @@ private class LoggingAuditSpan extends AbstractAuditSpanImpl { * Attach Range of data for GetObject Request. * @param request given get object request */ - private void attachRangeFromRequest(AmazonWebServiceRequest request) { - if (request instanceof GetObjectRequest) { - long[] rangeValue = ((GetObjectRequest) request).getRange(); - if (rangeValue == null || rangeValue.length == 0) { - return; - } - if (rangeValue.length != 2) { - WARN_INCORRECT_RANGE.warn("Expected range to contain 0 or 2 elements." - + " Got {} elements. Ignoring.", rangeValue.length); - return; - } - String combinedRangeValue = String.format("%d-%d", rangeValue[0], rangeValue[1]); - referrer.set(AuditConstants.PARAM_RANGE, combinedRangeValue); - } - } +// private void attachRangeFromRequest(AmazonWebServiceRequest request) { +// if (request instanceof GetObjectRequest) { +// long[] rangeValue = ((GetObjectRequest) request).getRange(); +// if (rangeValue == null || rangeValue.length == 0) { +// return; +// } +// if (rangeValue.length != 2) { +// WARN_INCORRECT_RANGE.warn("Expected range to contain 0 or 2 elements." +// + " Got {} elements. Ignoring.", rangeValue.length); +// return; +// } +// String combinedRangeValue = String.format("%d-%d", rangeValue[0], rangeValue[1]); +// referrer.set(AuditConstants.PARAM_RANGE, combinedRangeValue); +// } +// } private final String description; @@ -346,48 +347,61 @@ public void set(final String key, final String value) { referrer.set(key, value); } + + /** - * Before execution, the logging auditor always builds - * the referrer header, saves to the outer class - * (where {@link #getLastHeader()} can retrieve it, + * Before transmitting a request, the logging auditor + * always builds the referrer header, saves to the outer + * class (where {@link #getLastHeader()} can retrieve it, * and logs at debug. * If configured to add the header to the S3 logs, it will * be set as the HTTP referrer. - * @param request request - * @param type of request. - * @return the request with any extra headers. + * @param context The current state of the execution, + * including the SDK and current HTTP request. + * @param executionAttributes A mutable set of attributes scoped + * to one specific request/response + * cycle that can be used to give data + * to future lifecycle methods. + * @return The potentially-modified HTTP request that should be + * sent to the service. Must not be null. */ @Override - public T beforeExecution( - final T request) { - // attach range for GetObject requests - attachRangeFromRequest(request); - // for delete op, attach the number of files to delete - attachDeleteKeySizeAttribute(request); + public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, + ExecutionAttributes executionAttributes) { + SdkHttpRequest httpRequest = context.httpRequest(); + + // attach range for GetObject requests + attachRangeFromRequest(httpRequest, executionAttributes); + + // for delete op, attach the number of files to delete + attachDeleteKeySizeAttribute(request); + // build the referrer header final String header = referrer.buildHttpReferrer(); // update the outer class's field. setLastHeader(header); if (headerEnabled) { // add the referrer header - request.putCustomRequestHeader(HEADER_REFERRER, - header); + httpRequest = httpRequest.toBuilder() + .appendHeader(HEADER_REFERRER, header) + .build(); } if (LOG.isDebugEnabled()) { LOG.debug("[{}] {} Executing {} with {}; {}", currentThreadID(), getSpanId(), getOperationName(), - analyzer.analyze(request), + analyzer.analyze(context.request()), header); } + // now see if the request is actually a blocked multipart request - if (!isMultipartUploadEnabled && isRequestMultipartIO(request)) { + if (!isMultipartUploadEnabled && isRequestMultipartIO(httpRequest)) { throw new AuditOperationRejectedException("Multipart IO request " - + request + " rejected " + header); + + httpRequest + " rejected " + header); } - return request; + return httpRequest; } /** @@ -460,15 +474,13 @@ public boolean isValidSpan() { } @Override - public T requestCreated( - final T request) { + public void requestCreated(final SdkRequest.Builder builder) { String error = "Creating a request outside an audit span " - + analyzer.analyze(request); + + analyzer.analyze(builder.build()); LOG.info(error); if (LOG.isDebugEnabled()) { LOG.debug(error, new AuditFailureException("unaudited")); } - return request; } /** @@ -476,20 +488,22 @@ public T requestCreated( * increment the failure count. * Some requests (e.g. copy part) are not expected in spans due * to how they are executed; these do not trigger failures. - * @param request request - * @param type of request - * @return an updated request. - * @throws AuditFailureException if failure is enabled. + * @param context The current state of the execution, including + * the unmodified SDK request from the service + * client call. + * @param executionAttributes A mutable set of attributes scoped + * to one specific request/response + * cycle that can be used to give data + * to future lifecycle methods. */ @Override - public T beforeExecution( - final T request) { - + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { String error = "executing a request outside an audit span " - + analyzer.analyze(request); + + analyzer.analyze(context.request()); final String unaudited = getSpanId() + " " + UNAUDITED_OPERATION + " " + error; - if (isRequestNotAlwaysInSpan(request)) { + if (isRequestNotAlwaysInSpan(context.request())) { // can get by auditing during a copy, so don't overreact LOG.debug(unaudited); } else { @@ -500,7 +514,7 @@ public T beforeExecution( } } // now hand off to the superclass for its normal preparation - return super.beforeExecution(request); + super.beforeExecution(context, executionAttributes); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java index d1ebd922e073d..26d2db55c5641 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java @@ -24,10 +24,6 @@ import java.util.List; import java.util.UUID; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.transfer.Transfer; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -39,6 +35,9 @@ import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions; import org.apache.hadoop.service.CompositeService; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; /** @@ -121,17 +120,13 @@ public AuditSpanS3A createSpan(final String operation, } @Override - public List createRequestHandlers() throws IOException { + public List createExecutionInterceptors() throws IOException { return new ArrayList<>(); } @Override - public TransferStateChangeListener createStateChangeListener() { - return new TransferStateChangeListener() { - public void transferStateChanged(final Transfer transfer, - final Transfer.TransferState state) { - } - }; + public TransferListener createTransferListener() { + return new TransferListener() {}; } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java index f82e3d7f1e5e8..c170a2be6611d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/S3AInternalAuditConstants.java @@ -18,10 +18,10 @@ package org.apache.hadoop.fs.s3a.audit.impl; -import com.amazonaws.handlers.HandlerContextKey; +import software.amazon.awssdk.core.interceptor.ExecutionAttribute; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks; +import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; /** * Internal constants; not intended for public use, or @@ -34,11 +34,11 @@ private S3AInternalAuditConstants() { } /** - * Handler key for audit span callbacks. - * This is used to bind the handler in the AWS code. + * Exceution attribute for audit span callbacks. + * This is used to retrieve the span in the AWS code. */ - public static final HandlerContextKey - AUDIT_SPAN_HANDLER_CONTEXT = - new HandlerContextKey<>( - "org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks"); + public static final ExecutionAttribute + AUDIT_SPAN_EXECUTION_ATTRIBUTE = + new ExecutionAttribute<>( + "org.apache.hadoop.fs.s3a.audit.AuditSpanS3A"); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java index 1815285738b0e..4754427a4b118 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractAWSCredentialProvider.java @@ -21,7 +21,7 @@ import javax.annotation.Nullable; import java.net.URI; -import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.conf.Configuration; @@ -29,12 +29,9 @@ * Base class for AWS credential providers which * take a URI and config in their constructor. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ -@Deprecated public abstract class AbstractAWSCredentialProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { private final URI binding; @@ -65,10 +62,4 @@ public URI getUri() { return binding; } - /** - * Refresh is a no-op by default. - */ - @Override - public void refresh() { - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java index 5b1829e096123..365885cc70ab5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java @@ -23,29 +23,26 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; -import com.amazonaws.SdkBaseException; -import com.amazonaws.auth.AWSCredentials; import org.apache.hadoop.classification.VisibleForTesting; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.CredentialInitializationException; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.core.exception.SdkException; + /** * Base class for session credential support. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Private -@Deprecated public abstract class AbstractSessionCredentialsProvider extends AbstractAWSCredentialProvider { /** Credentials, created in {@link #init()}. */ - private volatile AWSCredentials awsCredentials; + private volatile AwsCredentials awsCredentials; /** Atomic flag for on-demand initialization. */ private final AtomicBoolean initialized = new AtomicBoolean(false); @@ -105,7 +102,7 @@ public boolean isInitialized() { * @return the credentials * @throws IOException on any failure. */ - protected abstract AWSCredentials createCredentials(Configuration config) + protected abstract AwsCredentials createCredentials(Configuration config) throws IOException; /** @@ -115,10 +112,10 @@ protected abstract AWSCredentials createCredentials(Configuration config) * is thrown here before any attempt to return the credentials * is made. * @return credentials, if set. - * @throws SdkBaseException if one was raised during init + * @throws SdkException if one was raised during init * @throws CredentialInitializationException on other failures. */ - public AWSCredentials getCredentials() throws SdkBaseException { + public AwsCredentials resolveCredentials() throws SdkException { // do an on-demand init then raise an AWS SDK exception if // there was a failure. try { @@ -126,8 +123,8 @@ public AWSCredentials getCredentials() throws SdkBaseException { init(); } } catch (IOException e) { - if (e.getCause() instanceof SdkBaseException) { - throw (SdkBaseException) e.getCause(); + if (e.getCause() instanceof SdkException) { + throw (SdkException) e.getCause(); } else { throw new CredentialInitializationException(e.getMessage(), e); } @@ -165,15 +162,16 @@ public IOException getInitializationException() { * This will be interpreted as "this provider has no credentials to offer", * rather than an explicit error or anonymous access. */ - protected static final class NoCredentials implements AWSCredentials { + protected static final class NoCredentials implements AwsCredentials { @Override - public String getAWSAccessKeyId() { + public String accessKeyId() { return null; } @Override - public String getAWSSecretKey() { + public String secretAccessKey() { return null; } } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index 1e2ac16075aeb..eb32ed8afc8a1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -26,16 +26,18 @@ import java.util.Locale; import java.util.concurrent.TimeUnit; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.StsException; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -61,13 +63,10 @@ * * Classname is used in configuration files; do not move. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Evolving -@Deprecated -public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, +public class AssumedRoleCredentialProvider implements AwsCredentialsProvider, Closeable { private static final Logger LOG = @@ -78,7 +77,7 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, public static final String E_NO_ROLE = "Unset property " + ASSUMED_ROLE_ARN; - private final STSAssumeRoleSessionCredentialsProvider stsProvider; + private final StsAssumeRoleCredentialsProvider stsProvider; private final String sessionName; @@ -90,15 +89,17 @@ public class AssumedRoleCredentialProvider implements AWSCredentialsProvider, private final Invoker invoker; + private final StsClient stsClient; + /** * Instantiate. - * This calls {@link #getCredentials()} to fail fast on the inner + * This calls {@link #resolveCredentials()} to fail fast on the inner * role credential retrieval. * @param fsUri possibly null URI of the filesystem. * @param conf configuration * @throws IOException on IO problems and some parameter checking * @throws IllegalArgumentException invalid parameters - * @throws AWSSecurityTokenServiceException problems getting credentials + * @throws StsException problems getting credentials */ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) throws IOException { @@ -125,29 +126,31 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) String policy = conf.getTrimmed(ASSUMED_ROLE_POLICY, ""); LOG.debug("{}", this); - STSAssumeRoleSessionCredentialsProvider.Builder builder - = new STSAssumeRoleSessionCredentialsProvider.Builder(arn, sessionName); - builder.withRoleSessionDurationSeconds((int) duration); + + AssumeRoleRequest.Builder requestBuilder = + AssumeRoleRequest.builder().roleArn(arn).roleSessionName(sessionName) + .durationSeconds((int) duration); + if (StringUtils.isNotEmpty(policy)) { LOG.debug("Scope down policy {}", policy); - builder.withScopeDownPolicy(policy); + requestBuilder.policy(policy); } + String endpoint = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT, ""); String region = conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT_REGION, ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT); - AWSSecurityTokenServiceClientBuilder stsbuilder = + stsClient = STSClientFactory.builder( conf, fsUri != null ? fsUri.getHost() : "", credentialsToSTS, endpoint, - region); - // the STS client is not tracked for a shutdown in close(), because it - // (currently) throws an UnsupportedOperationException in shutdown(). - builder.withStsClient(stsbuilder.build()); + region).build(); //now build the provider - stsProvider = builder.build(); + stsProvider = StsAssumeRoleCredentialsProvider.builder() + .refreshRequest(requestBuilder.build()) + .stsClient(stsClient).build(); // to handle STS throttling by the AWS account, we // need to retry @@ -155,21 +158,21 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) // and force in a fail-fast check just to keep the stack traces less // convoluted - getCredentials(); + resolveCredentials(); } /** * Get credentials. * @return the credentials - * @throws AWSSecurityTokenServiceException if none could be obtained. + * @throws StsException if none could be obtained. */ @Override @Retries.RetryRaw - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { try { return invoker.retryUntranslated("getCredentials", true, - stsProvider::getCredentials); + stsProvider::resolveCredentials); } catch (IOException e) { // this is in the signature of retryUntranslated; // its hard to see how this could be raised, but for @@ -178,24 +181,19 @@ public AWSCredentials getCredentials() { throw new CredentialInitializationException( "getCredentials failed: " + e, e); - } catch (AWSSecurityTokenServiceException e) { + } catch (SdkClientException e) { LOG.error("Failed to get credentials for role {}", arn, e); throw e; } } - @Override - public void refresh() { - stsProvider.refresh(); - } - /** * Propagate the close() call to the inner stsProvider. */ @Override public void close() { - S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS); + S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS, stsClient); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java index ca9c518d30048..f505cfcab5d4f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java @@ -21,10 +21,11 @@ import java.io.Closeable; import java.io.IOException; -import com.amazonaws.AmazonClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -41,17 +42,14 @@ *

* It is implicitly public; marked evolving as we can change its semantics. * - * @deprecated This class will be replaced by one that implements AWS SDK V2's AwsCredentialProvider - * as part of upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.Public @InterfaceStability.Evolving -@Deprecated public class IAMInstanceCredentialsProvider - implements AWSCredentialsProvider, Closeable { + implements AwsCredentialsProvider, Closeable { - private final AWSCredentialsProvider provider = - new EC2ContainerCredentialsProviderWrapper(); + private final AwsCredentialsProvider containerCredentialsProvider = + ContainerCredentialsProvider.builder().build(); public IAMInstanceCredentialsProvider() { } @@ -63,19 +61,29 @@ public IAMInstanceCredentialsProvider() { * @throws NoAwsCredentialsException on auth failure to indicate non-recoverable. */ @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { try { - return provider.getCredentials(); - } catch (AmazonClientException e) { + return getCredentials(); + } catch (SdkClientException e) { throw new NoAwsCredentialsException("IAMInstanceCredentialsProvider", e.getMessage(), e); } } - @Override - public void refresh() { - provider.refresh(); + /** + * First try {@link ContainerCredentialsProvider}, which will throw an exception if credentials + * cannot be retrieved from the container. Then resolve credentials + * using {@link InstanceProfileCredentialsProvider}. + * + * @return credentials + */ + private AwsCredentials getCredentials() { + try { + return containerCredentialsProvider.resolveCredentials(); + } catch (SdkClientException e) { + return InstanceProfileCredentialsProvider.create().resolveCredentials(); + } } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java index 29e815560a8a9..e91f8b0824020 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java @@ -24,19 +24,18 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.model.Credentials; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.model.Credentials; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; @@ -77,10 +76,10 @@ private MarshalledCredentialBinding() { public static MarshalledCredentials fromSTSCredentials( final Credentials credentials) { MarshalledCredentials marshalled = new MarshalledCredentials( - credentials.getAccessKeyId(), - credentials.getSecretAccessKey(), - credentials.getSessionToken()); - Date date = credentials.getExpiration(); + credentials.accessKeyId(), + credentials.secretAccessKey(), + credentials.sessionToken()); + Date date = Date.from(credentials.expiration()); marshalled.setExpiration(date != null ? date.getTime() : 0); return marshalled; } @@ -91,11 +90,11 @@ public static MarshalledCredentials fromSTSCredentials( * @return a set of marshalled credentials. */ public static MarshalledCredentials fromAWSCredentials( - final AWSSessionCredentials credentials) { + final AwsSessionCredentials credentials) { return new MarshalledCredentials( - credentials.getAWSAccessKeyId(), - credentials.getAWSSecretKey(), - credentials.getSessionToken()); + credentials.accessKeyId(), + credentials.secretAccessKey(), + credentials.sessionToken()); } /** @@ -156,7 +155,7 @@ public static MarshalledCredentials fromFileSystem( * @throws NoAuthWithAWSException validation failure * @throws NoAwsCredentialsException the credentials are actually empty. */ - public static AWSCredentials toAWSCredentials( + public static AwsCredentials toAWSCredentials( final MarshalledCredentials marshalled, final MarshalledCredentials.CredentialTypeRequired typeRequired, final String component) @@ -173,18 +172,18 @@ public static AWSCredentials toAWSCredentials( final String secretKey = marshalled.getSecretKey(); if (marshalled.hasSessionToken()) { // a session token was supplied, so return session credentials - return new BasicSessionCredentials(accessKey, secretKey, + return AwsSessionCredentials.create(accessKey, secretKey, marshalled.getSessionToken()); } else { // these are full credentials - return new BasicAWSCredentials(accessKey, secretKey); + return AwsBasicCredentials.create(accessKey, secretKey); } } /** * Request a set of credentials from an STS endpoint. * @param parentCredentials the parent credentials needed to talk to STS - * @param awsConf AWS client configuration + * @param configuration AWS client configuration * @param stsEndpoint an endpoint, use "" for none * @param stsRegion region; use if the endpoint isn't the AWS default. * @param duration duration of the credentials in seconds. Minimum value: 900. @@ -194,25 +193,26 @@ public static AWSCredentials toAWSCredentials( */ @Retries.RetryTranslated public static MarshalledCredentials requestSessionCredentials( - final AWSCredentialsProvider parentCredentials, - final ClientConfiguration awsConf, + final AwsCredentialsProvider parentCredentials, + final Configuration configuration, final String stsEndpoint, final String stsRegion, final int duration, - final Invoker invoker) throws IOException { + final Invoker invoker, + final String bucket) throws IOException { try { - final AWSSecurityTokenService tokenService = + final StsClient tokenService = STSClientFactory.builder(parentCredentials, - awsConf, + configuration, stsEndpoint.isEmpty() ? null : stsEndpoint, - stsRegion) + stsRegion, bucket) .build(); try (STSClientFactory.STSClient stsClient = STSClientFactory.createClientConnection( tokenService, invoker)) { return fromSTSCredentials(stsClient.requestSessionCredentials(duration, TimeUnit.SECONDS)); } - } catch (SdkClientException e) { + } catch (SdkException e) { if (stsRegion.isEmpty()) { LOG.error("Region must be provided when requesting session credentials.", e); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java index 8bd04744cd8c0..4bb5f65e14cb3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialProvider.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.net.URI; -import com.amazonaws.auth.AWSCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -40,7 +40,6 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -@SuppressWarnings("deprecation") public class MarshalledCredentialProvider extends AbstractSessionCredentialsProvider { @@ -85,7 +84,7 @@ public MarshalledCredentialProvider( * @throws IOException on a failure */ @Override - protected AWSCredentials createCredentials(final Configuration config) + protected AwsCredentials createCredentials(final Configuration config) throws IOException { return toAWSCredentials(credentials, typeRequired, component); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java index 7ec13b092c9bc..8f92153b2e1d6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java @@ -21,7 +21,7 @@ import org.apache.hadoop.fs.s3a.CredentialInitializationException; /** - * A specific subclass of {@code AmazonClientException} which is + * A specific subclass of {@code SdkException} which is * used in the S3A retry policy to fail fast when there is any * authentication problem. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java index 82d4fa588164d..ebd8ad9fddc98 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java @@ -20,28 +20,33 @@ import java.io.Closeable; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.AssumeRoleRequest; -import com.amazonaws.services.securitytoken.model.Credentials; -import com.amazonaws.services.securitytoken.model.GetSessionTokenRequest; +import org.apache.hadoop.fs.s3a.AWSClientConfig; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.Credentials; +import software.amazon.awssdk.services.sts.model.GetSessionTokenRequest; +import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.s3a.S3AUtils; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isNotEmpty; @@ -71,17 +76,15 @@ public class STSClientFactory { * @return the builder to call {@code build()} * @throws IOException problem reading proxy secrets */ - public static AWSSecurityTokenServiceClientBuilder builder( + public static StsClientBuilder builder( final Configuration conf, final String bucket, - final AWSCredentialsProvider credentials) throws IOException { - final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket, - Constants.AWS_SERVICE_IDENTIFIER_STS); + final AwsCredentialsProvider credentials) throws IOException { String endpoint = conf.getTrimmed(DELEGATION_TOKEN_ENDPOINT, DEFAULT_DELEGATION_TOKEN_ENDPOINT); String region = conf.getTrimmed(DELEGATION_TOKEN_REGION, DEFAULT_DELEGATION_TOKEN_REGION); - return builder(credentials, awsConf, endpoint, region); + return builder(credentials, conf, endpoint, region, bucket); } /** @@ -96,37 +99,55 @@ public static AWSSecurityTokenServiceClientBuilder builder( * @return the builder to call {@code build()} * @throws IOException problem reading proxy secrets */ - public static AWSSecurityTokenServiceClientBuilder builder( + public static StsClientBuilder builder( final Configuration conf, final String bucket, - final AWSCredentialsProvider credentials, + final AwsCredentialsProvider credentials, final String stsEndpoint, final String stsRegion) throws IOException { - final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket, - Constants.AWS_SERVICE_IDENTIFIER_STS); - return builder(credentials, awsConf, stsEndpoint, stsRegion); + return builder(credentials, conf, stsEndpoint, stsRegion, bucket); } /** * Create the builder ready for any final configuration options. * Picks up connection settings from the Hadoop configuration, including * proxy secrets. - * @param awsConf AWS configuration. + * @param conf AWS configuration. * @param credentials AWS credential chain to use * @param stsEndpoint optional endpoint "https://sns.us-west-1.amazonaws.com" * @param stsRegion the region, e.g "us-west-1". Must be set if endpoint is. * @return the builder to call {@code build()} */ - public static AWSSecurityTokenServiceClientBuilder builder( - final AWSCredentialsProvider credentials, - final ClientConfiguration awsConf, + public static StsClientBuilder builder( + final AwsCredentialsProvider credentials, + final Configuration conf, final String stsEndpoint, - final String stsRegion) { - final AWSSecurityTokenServiceClientBuilder builder - = AWSSecurityTokenServiceClientBuilder.standard(); + final String stsRegion, + final String bucket) throws IOException { + final StsClientBuilder stsClientBuilder = StsClient.builder(); + Preconditions.checkArgument(credentials != null, "No credentials"); - builder.withClientConfiguration(awsConf); - builder.withCredentials(credentials); + + final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = + AWSClientConfig.createClientConfigBuilder(conf); + + final ApacheHttpClient.Builder httpClientBuilder = + AWSClientConfig.createHttpClientBuilder(conf); + + final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); + + final ProxyConfiguration proxyConfig = + AWSClientConfig.createProxyConfiguration(conf, bucket); + + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); + httpClientBuilder.proxyConfiguration(proxyConfig); + + stsClientBuilder + .httpClientBuilder(httpClientBuilder) + .overrideConfiguration(clientOverrideConfigBuilder.build()) + .credentialsProvider(credentials); + + // TODO: SIGNERS NOT ADDED YET. boolean destIsStandardEndpoint = STS_STANDARD.equals(stsEndpoint); if (isNotEmpty(stsEndpoint) && !destIsStandardEndpoint) { Preconditions.checkArgument( @@ -134,26 +155,43 @@ public static AWSSecurityTokenServiceClientBuilder builder( "STS endpoint is set to %s but no signing region was provided", stsEndpoint); LOG.debug("STS Endpoint={}; region='{}'", stsEndpoint, stsRegion); - builder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration(stsEndpoint, stsRegion)); + stsClientBuilder.endpointOverride(getSTSEndpoint(stsEndpoint)) + .region(Region.of(stsRegion)); } else { Preconditions.checkArgument(isEmpty(stsRegion), "STS signing region set set to %s but no STS endpoint specified", stsRegion); } - return builder; + return stsClientBuilder; } + /** + * Given a endpoint string, create the endpoint URI. + * + * @param endpoint possibly null endpoint. + * @return an endpoint uri + */ + private static URI getSTSEndpoint(String endpoint) { + try { + // TODO: The URI builder is currently imported via a shaded dependency. This is due to TM + // preview dependency causing some issues. + return new URIBuilder().setScheme("https").setHost(endpoint).build(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** * Create an STS Client instance. - * @param tokenService STS instance + * @param stsClient STS instance * @param invoker invoker to use * @return an STS client bonded to that interface. */ public static STSClient createClientConnection( - final AWSSecurityTokenService tokenService, + final StsClient stsClient, final Invoker invoker) { - return new STSClient(tokenService, invoker); + return new STSClient(stsClient, invoker); } /** @@ -161,21 +199,19 @@ public static STSClient createClientConnection( */ public static final class STSClient implements Closeable { - private final AWSSecurityTokenService tokenService; + private final StsClient stsClient; private final Invoker invoker; - private STSClient(final AWSSecurityTokenService tokenService, + private STSClient(final StsClient stsClient, final Invoker invoker) { - this.tokenService = tokenService; + this.stsClient = stsClient; this.invoker = invoker; } @Override public void close() throws IOException { - // Since we are not using AbstractAWSSecurityTokenService, we - // don't need to worry about catching UnsupportedOperationException. - tokenService.shutdown(); + stsClient.close(); } /** @@ -192,13 +228,13 @@ public Credentials requestSessionCredentials( final TimeUnit timeUnit) throws IOException { int durationSeconds = (int) timeUnit.toSeconds(duration); LOG.debug("Requesting session token of duration {}", duration); - final GetSessionTokenRequest request = new GetSessionTokenRequest(); - request.setDurationSeconds(durationSeconds); + final GetSessionTokenRequest request = + GetSessionTokenRequest.builder().durationSeconds(durationSeconds).build(); return invoker.retry("request session credentials", "", true, () ->{ LOG.info("Requesting Amazon STS Session credentials"); - return tokenService.getSessionToken(request).getCredentials(); + return stsClient.getSessionToken(request).credentials(); }); } @@ -222,15 +258,14 @@ public Credentials requestRole( final TimeUnit timeUnit) throws IOException { LOG.debug("Requesting role {} with duration {}; policy = {}", roleARN, duration, policy); - AssumeRoleRequest request = new AssumeRoleRequest(); - request.setDurationSeconds((int) timeUnit.toSeconds(duration)); - request.setRoleArn(roleARN); - request.setRoleSessionName(sessionName); + AssumeRoleRequest.Builder requestBuilder = + AssumeRoleRequest.builder().durationSeconds((int) timeUnit.toSeconds(duration)) + .roleArn(roleARN).roleSessionName(sessionName); if (isNotEmpty(policy)) { - request.setPolicy(policy); + requestBuilder.policy(policy); } return invoker.retry("request role credentials", "", true, - () -> tokenService.assumeRole(request).getCredentials()); + () -> stsClient.assumeRole(requestBuilder.build()).credentials()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java index 6526f9a947815..889e1e2c4af79 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java @@ -20,9 +20,6 @@ import java.util.Optional; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; - import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; /** @@ -35,37 +32,30 @@ public final class EncryptionSecretOperations { private EncryptionSecretOperations() { } - /** - * Create SSE-C client side key encryption options on demand. - * @return an optional key to attach to a request. + /*** + * Gets the SSE-C client side key if present. + * * @param secrets source of the encryption secrets. + * @return an optional key to attach to a request. */ - public static Optional createSSECustomerKey( - final EncryptionSecrets secrets) { - if (secrets.hasEncryptionKey() && - secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) { - return Optional.of(new SSECustomerKey(secrets.getEncryptionKey())); + public static Optional getSSECustomerKey(final EncryptionSecrets secrets) { + if (secrets.hasEncryptionKey() && secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) { + return Optional.of(secrets.getEncryptionKey()); } else { - return Optional.empty(); + return Optional.empty(); } } /** - * Create SSE-KMS options for a request, iff the encryption is SSE-KMS. - * @return an optional SSE-KMS param to attach to a request. + * Gets the SSE-KMS key if present, else let S3 use AWS managed key. + * * @param secrets source of the encryption secrets. + * @return an optional key to attach to a request. */ - public static Optional createSSEAwsKeyManagementParams( - final EncryptionSecrets secrets) { - - //Use specified key, otherwise default to default master aws/s3 key by AWS - if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS) { - if (secrets.hasEncryptionKey()) { - return Optional.of(new SSEAwsKeyManagementParams( - secrets.getEncryptionKey())); - } else { - return Optional.of(new SSEAwsKeyManagementParams()); - } + public static Optional getSSEAwsKMSKey(final EncryptionSecrets secrets) { + if (secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS + && secrets.hasEncryptionKey()) { + return Optional.of(secrets.getEncryptionKey()); } else { return Optional.empty(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java index 9b06031d5866a..cb0cb64233aa1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java @@ -23,11 +23,11 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; -import com.amazonaws.services.securitytoken.model.Credentials; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java index bfb7e6966457b..73123a0d71ee4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java @@ -228,7 +228,10 @@ protected void serviceStop() throws Exception { try { super.serviceStop(); } finally { - ServiceOperations.stopQuietly(LOG, tokenBinding); + // TODO: Importing the transfer manager preview outside of the bundle causes some + // issues. Over here, it can no longer find the LOG. We expect this to be fixed with the + // release of the TM. + // ServiceOperations.stopQuietly(LOG, tokenBinding); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 2f0a71767edfb..434ec5b24670a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -26,21 +26,18 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import software.amazon.awssdk.services.sts.StsClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; -import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3ARetryPolicy; -import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialProvider; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentials; import org.apache.hadoop.fs.s3a.auth.RoleModel; @@ -292,23 +289,22 @@ private synchronized Optional maybeInitSTS() // chain. // As no codepath (session propagation, STS creation) will work, // throw this. - final AWSCredentials parentCredentials = once("get credentials", + final AwsCredentials parentCredentials = once("get credentials", "", - () -> parentAuthChain.getCredentials()); - hasSessionCreds = parentCredentials instanceof AWSSessionCredentials; + () -> parentAuthChain.resolveCredentials()); + hasSessionCreds = parentCredentials instanceof AwsSessionCredentials; if (!hasSessionCreds) { LOG.debug("Creating STS client for {}", getDescription()); invoker = new Invoker(new S3ARetryPolicy(conf), LOG_EVENT); - ClientConfiguration awsConf = - S3AUtils.createAwsConf(conf, uri.getHost(), - Constants.AWS_SERVICE_IDENTIFIER_STS); - AWSSecurityTokenService tokenService = + + StsClient tokenService = STSClientFactory.builder(parentAuthChain, - awsConf, + conf, endpoint, - region) + region, + uri.getHost()) .build(); stsClient = Optional.of( STSClientFactory.createClientConnection(tokenService, invoker)); @@ -374,11 +370,11 @@ public SessionTokenIdentifier createTokenIdentifier( + " -duration unknown", getCanonicalUri()); } origin += " " + CREDENTIALS_CONVERTED_TO_DELEGATION_TOKEN; - final AWSCredentials awsCredentials - = parentAuthChain.getCredentials(); - if (awsCredentials instanceof AWSSessionCredentials) { + final AwsCredentials awsCredentials + = parentAuthChain.resolveCredentials(); + if (awsCredentials instanceof AwsSessionCredentials) { marshalledCredentials = fromAWSCredentials( - (AWSSessionCredentials) awsCredentials); + (AwsSessionCredentials) awsCredentials); } else { throw new DelegationTokenIOException( "AWS Authentication chain is no longer supplying session secrets"); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java index e53c690431ee0..54d3bc2e24e72 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java @@ -27,10 +27,11 @@ import java.util.List; import java.util.UUID; -import com.amazonaws.services.s3.model.MultipartUpload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.MultipartUpload; + import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; @@ -975,7 +976,7 @@ protected void abortPendingUploadsInCleanup( .executeWith(commitContext.getOuterSubmitter()) .suppressExceptions(suppressExceptions) .run(u -> commitContext.abortMultipartCommit( - u.getKey(), u.getUploadId())); + u.key(), u.uploadId())); } else { LOG.info("No pending uploads were found"); } @@ -1300,8 +1301,8 @@ protected void warnOnActiveUploads(final Path path) { DateFormat df = DateFormat.getDateTimeInstance(); pending.forEach(u -> LOG.info("[{}] {}", - df.format(u.getInitiated()), - u.getKey())); + df.format(Date.from(u.initiated())), + u.key())); if (shouldAbortUploadsInCleanup()) { LOG.warn("This committer will abort these uploads in job cleanup"); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java index 10440f77e7277..6c3cf3942d527 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/PutTracker.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.util.List; -import com.amazonaws.services.s3.model.PartETag; +import software.amazon.awssdk.services.s3.model.CompletedPart; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.statistics.IOStatistics; @@ -76,7 +76,7 @@ public boolean outputImmediatelyVisible() { * @throws IOException I/O problem or validation failure. */ public boolean aboutToComplete(String uploadId, - List parts, + List parts, long bytesWritten, final IOStatistics iostatistics) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java index 77c3fed11fb24..8801c8bdce797 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java @@ -31,9 +31,10 @@ import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.PartETag; import com.fasterxml.jackson.annotation.JsonProperty; +import software.amazon.awssdk.services.s3.model.CompletedPart; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -215,13 +216,13 @@ public void touch(long millis) { * @param parts ordered list of etags. * @throws ValidationFailure if the data is invalid */ - public void bindCommitData(List parts) throws ValidationFailure { + public void bindCommitData(List parts) throws ValidationFailure { etags = new ArrayList<>(parts.size()); int counter = 1; - for (PartETag part : parts) { - verify(part.getPartNumber() == counter, - "Expected part number %s but got %s", counter, part.getPartNumber()); - etags.add(part.getETag()); + for (CompletedPart part : parts) { + verify(part.partNumber() == counter, + "Expected part number %s but got %s", counter, part.partNumber()); + etags.add(part.eTag()); counter++; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java index ef56d82978158..0dc0db24bafcd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java @@ -21,6 +21,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -32,13 +34,15 @@ import javax.annotation.Nullable; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -155,9 +159,9 @@ public CommitOperations(S3AFileSystem fs, * @param tagIds list of tags * @return same list, now in numbered tuples */ - public static List toPartEtags(List tagIds) { + public static List toPartEtags(List tagIds) { return IntStream.range(0, tagIds.size()) - .mapToObj(i -> new PartETag(i + 1, tagIds.get(i))) + .mapToObj(i -> CompletedPart.builder().partNumber(i + 1).eTag(tagIds.get(i)).build()) .collect(Collectors.toList()); } @@ -566,26 +570,30 @@ public SinglePendingCommit uploadFileToPendingCommit(File localFile, numParts, length)); } - List parts = new ArrayList<>((int) numParts); + List parts = new ArrayList<>((int) numParts); LOG.debug("File size is {}, number of parts to upload = {}", length, numParts); - for (int partNumber = 1; partNumber <= numParts; partNumber += 1) { - progress.progress(); - long size = Math.min(length - offset, uploadPartSize); - UploadPartRequest part; - part = writeOperations.newUploadPartRequest( - destKey, - uploadId, - partNumber, - (int) size, - null, - localFile, - offset); - part.setLastPart(partNumber == numParts); - UploadPartResult partResult = writeOperations.uploadPart(part, statistics); - offset += uploadPartSize; - parts.add(partResult.getPartETag()); + + // Open the file to upload. + try (InputStream fileStream = Files.newInputStream(localFile.toPath())) { + for (int partNumber = 1; partNumber <= numParts; partNumber += 1) { + progress.progress(); + long size = Math.min(length - offset, uploadPartSize); + UploadPartRequest part = writeOperations.newUploadPartRequestBuilder( + destKey, + uploadId, + partNumber, + size).build(); + // Read from the file input stream at current position. + RequestBody body = RequestBody.fromInputStream(fileStream, size); + UploadPartResponse response = writeOperations.uploadPart(part, body, statistics); + offset += uploadPartSize; + parts.add(CompletedPart.builder() + .partNumber(partNumber) + .eTag(response.eTag()) + .build()); + } } commitData.bindCommitData(parts); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java index 1a5451df801dd..135adf0de39a2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java @@ -20,19 +20,22 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.HashMap; import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3ADataBlocks; import org.apache.hadoop.fs.s3a.WriteOperationHelper; import org.apache.hadoop.fs.s3a.commit.PutTracker; import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; @@ -125,7 +128,7 @@ public boolean outputImmediatelyVisible() { */ @Override public boolean aboutToComplete(String uploadId, - List parts, + List parts, long bytesWritten, final IOStatistics iostatistics) throws IOException { @@ -144,10 +147,9 @@ public boolean aboutToComplete(String uploadId, headers.put(X_HEADER_MAGIC_MARKER, Long.toString(bytesWritten)); PutObjectRequest originalDestPut = writer.createPutObjectRequest( originalDestKey, - new ByteArrayInputStream(EMPTY), 0, - new PutObjectOptions(true, null, headers)); - upload(originalDestPut); + new PutObjectOptions(true, null, headers), false); + upload(originalDestPut, new ByteArrayInputStream(EMPTY)); // build the commit summary SinglePendingCommit commitData = new SinglePendingCommit(); @@ -170,22 +172,22 @@ public boolean aboutToComplete(String uploadId, path, pendingPartKey, commitData); PutObjectRequest put = writer.createPutObjectRequest( pendingPartKey, - new ByteArrayInputStream(bytes), - bytes.length, null); - upload(put); + bytes.length, null, false); + upload(put, new ByteArrayInputStream(bytes)); return false; } /** * PUT an object. * @param request the request + * @param inputStream input stream of data to be uploaded * @throws IOException on problems */ @Retries.RetryTranslated - private void upload(PutObjectRequest request) throws IOException { - trackDurationOfInvocation(trackerStatistics, - COMMITTER_MAGIC_MARKER_PUT.getSymbol(), () -> - writer.putObject(request, PutObjectOptions.keepingDirs(), null)); + private void upload(PutObjectRequest request, InputStream inputStream) throws IOException { + trackDurationOfInvocation(trackerStatistics, COMMITTER_MAGIC_MARKER_PUT.getSymbol(), + () -> writer.putObject(request, PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(inputStream), false, null)); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java index 4169a9899cb15..f076a4b701e00 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java @@ -20,8 +20,6 @@ import java.util.List; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +28,10 @@ import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException; import static org.apache.hadoop.fs.s3a.Statistic.IGNORED_ERRORS; import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_THROTTLED; @@ -113,15 +115,15 @@ public void bulkDeleteRetried( * @param deleteRequest request which failed. */ private void onDeleteThrottled(final DeleteObjectsRequest deleteRequest) { - final List keys = deleteRequest.getKeys(); + final List keys = deleteRequest.delete().objects(); final int size = keys.size(); incrementStatistic(STORE_IO_THROTTLED, size); instrumentation.addValueToQuantiles(STORE_IO_THROTTLE_RATE, size); THROTTLE_LOG.info( "Bulk delete {} keys throttled -first key = {}; last = {}", size, - keys.get(0).getKey(), - keys.get(size - 1).getKey()); + keys.get(0).key(), + keys.get(size - 1).key()); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index dcb538dc668de..f9d673e657966 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -20,15 +20,17 @@ import java.util.Locale; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.transfer.model.CopyResult; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -211,11 +213,24 @@ public String toString() { * null if the attribute is unavailable (such as when the policy says to use * versionId but object versioning is not enabled for the bucket). */ - public abstract String getRevisionId(ObjectMetadata objectMetadata, + public abstract String getRevisionId(HeadObjectResponse objectMetadata, String uri); /** - * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the + * * revision identifier from {@link GetObjectResponse}. + * + * @param getObjectResponse the response instance + * @param uri the URI of the object + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(GetObjectResponse getObjectResponse, + String uri); + + /** + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the * revision identifier from {@link S3ObjectAttributes}. * * @param s3Attributes the object attributes @@ -226,44 +241,44 @@ public abstract String getRevisionId(ObjectMetadata objectMetadata, public abstract String getRevisionId(S3ObjectAttributes s3Attributes); /** - * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the - * revision identifier from {@link CopyResult}. + * Like {{@link #getRevisionId(HeadObjectResponse, String)}}, but retrieves the + * revision identifier from {@link CopyObjectResponse}. * - * @param copyResult the copy result + * @param copyObjectResponse the copy response * @return the revisionId string as interpreted by this policy, or potentially * null if the attribute is unavailable (such as when the policy says to use * versionId but object versioning is not enabled for the bucket). */ - public abstract String getRevisionId(CopyResult copyResult); + public abstract String getRevisionId(CopyObjectResponse copyObjectResponse); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} - * as a server-side qualification on the {@code GetObjectRequest}. + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} + * as a server-side qualification on the {@code GetObjectRequest.Builder}. * * @param request the request * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(GetObjectRequest request, + public abstract void applyRevisionConstraint(GetObjectRequest.Builder request, String revisionId); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} * as a server-side qualification on the {@code CopyObjectRequest}. * - * @param request the request + * @param requestBuilder the copy object request builder * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(CopyObjectRequest request, + public abstract void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId); /** - * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * Applies the given {@link #getRevisionId(HeadObjectResponse, String) revisionId} * as a server-side qualification on the {@code GetObjectMetadataRequest}. * - * @param request the request + * @param requestBuilder the head object request builder * @param revisionId the revision id */ - public abstract void applyRevisionConstraint(GetObjectMetadataRequest request, + public abstract void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId); /** @@ -323,7 +338,7 @@ public ImmutablePair onChangeDetected( } /** - * Change detection policy based on {@link ObjectMetadata#getETag() eTag}. + * Change detection policy based on {@link HeadObjectResponse#eTag()} () eTag}. */ static class ETagChangeDetectionPolicy extends ChangeDetectionPolicy { @@ -332,8 +347,13 @@ static class ETagChangeDetectionPolicy extends ChangeDetectionPolicy { } @Override - public String getRevisionId(ObjectMetadata objectMetadata, String uri) { - return objectMetadata.getETag(); + public String getRevisionId(GetObjectResponse objectMetadata, String uri) { + return objectMetadata.eTag(); + } + + @Override + public String getRevisionId(HeadObjectResponse objectMetadata, String uri) { + return objectMetadata.eTag(); } @Override @@ -342,34 +362,34 @@ public String getRevisionId(S3ObjectAttributes s3Attributes) { } @Override - public String getRevisionId(CopyResult copyResult) { - return copyResult.getETag(); + public String getRevisionId(CopyObjectResponse copyObjectResponse) { + return copyObjectResponse.copyObjectResult().eTag(); } @Override - public void applyRevisionConstraint(GetObjectRequest request, + public void applyRevisionConstraint(GetObjectRequest.Builder builder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting get request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + builder.ifMatch(revisionId); } else { LOG.debug("No etag revision ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting copy request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + requestBuilder.copySourceIfMatch(revisionId); } else { LOG.debug("No etag revision ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { LOG.debug("Unable to restrict HEAD request to etag; will check later"); } @@ -388,7 +408,7 @@ public String toString() { /** * Change detection policy based on - * {@link ObjectMetadata#getVersionId() versionId}. + * {@link HeadObjectResponse#versionId()} () versionId}. */ static class VersionIdChangeDetectionPolicy extends ChangeDetectionPolicy { @@ -398,8 +418,16 @@ static class VersionIdChangeDetectionPolicy extends } @Override - public String getRevisionId(ObjectMetadata objectMetadata, String uri) { - String versionId = objectMetadata.getVersionId(); + public String getRevisionId(HeadObjectResponse objectMetadata, String uri) { + return logIfNull(objectMetadata.versionId(), uri); + } + + @Override + public String getRevisionId(GetObjectResponse getObjectResponse, String uri) { + return logIfNull(getObjectResponse.versionId(), uri); + } + + private String logIfNull(String versionId, String uri) { if (versionId == null) { // this policy doesn't work if the bucket doesn't have object versioning // enabled (which isn't by default) @@ -407,8 +435,7 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) { CHANGE_DETECT_MODE + " set to " + Source.VersionId + " but no versionId available while reading {}. " + "Ensure your bucket has object versioning enabled. " - + "You may see inconsistent reads.", - uri); + + "You may see inconsistent reads.", uri); } return versionId; } @@ -419,38 +446,38 @@ public String getRevisionId(S3ObjectAttributes s3Attributes) { } @Override - public String getRevisionId(CopyResult copyResult) { - return copyResult.getVersionId(); + public String getRevisionId(CopyObjectResponse copyObjectResponse) { + return copyObjectResponse.versionId(); } @Override - public void applyRevisionConstraint(GetObjectRequest request, + public void applyRevisionConstraint(GetObjectRequest.Builder builder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting get request to version {}", revisionId); - request.withVersionId(revisionId); + builder.versionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting copy request to version {}", revisionId); - request.withSourceVersionId(revisionId); + requestBuilder.sourceVersionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { if (revisionId != null) { LOG.debug("Restricting metadata request to version {}", revisionId); - request.withVersionId(revisionId); + requestBuilder.versionId(revisionId); } else { LOG.debug("No version ID to use as a constraint"); } @@ -482,7 +509,13 @@ public Source getSource() { } @Override - public String getRevisionId(final ObjectMetadata objectMetadata, + public String getRevisionId(final GetObjectResponse objectMetadata, + final String uri) { + return null; + } + + @Override + public String getRevisionId(final HeadObjectResponse objectMetadata, final String uri) { return null; } @@ -493,24 +526,24 @@ public String getRevisionId(final S3ObjectAttributes s3ObjectAttributes) { } @Override - public String getRevisionId(CopyResult copyResult) { + public String getRevisionId(CopyObjectResponse copyObjectResponse) { return null; } @Override - public void applyRevisionConstraint(final GetObjectRequest request, + public void applyRevisionConstraint(final GetObjectRequest.Builder builder, final String revisionId) { } @Override - public void applyRevisionConstraint(CopyObjectRequest request, + public void applyRevisionConstraint(CopyObjectRequest.Builder requestBuilder, String revisionId) { } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, + public void applyRevisionConstraint(HeadObjectRequest.Builder requestBuilder, String revisionId) { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index e7dd75c581131..6020f979fa3ee 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -18,18 +18,19 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.transfer.model.CopyResult; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -39,6 +40,7 @@ import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.statistics.ChangeTrackerStatistics; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_412_PRECONDITION_FAILED; import static org.apache.hadoop.util.Preconditions.checkNotNull; /** @@ -55,8 +57,6 @@ public class ChangeTracker { private static final Logger LOG = LoggerFactory.getLogger(ChangeTracker.class); - /** {@code 412 Precondition Failed} (HTTP/1.1 - RFC 2616) */ - public static final int SC_PRECONDITION_FAILED = 412; public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3"; /** Policy to use. */ @@ -117,15 +117,15 @@ public long getVersionMismatches() { /** * Apply any revision control set by the policy if it is to be * enforced on the server. - * @param request request to modify + * @param builder request builder to modify * @return true iff a constraint was added. */ public boolean maybeApplyConstraint( - final GetObjectRequest request) { + final GetObjectRequest.Builder builder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(builder, revisionId); return true; } return false; @@ -134,26 +134,26 @@ public boolean maybeApplyConstraint( /** * Apply any revision control set by the policy if it is to be * enforced on the server. - * @param request request to modify + * @param requestBuilder copy object request builder to modify * @return true iff a constraint was added. */ public boolean maybeApplyConstraint( - final CopyObjectRequest request) { + final CopyObjectRequest.Builder requestBuilder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(requestBuilder, revisionId); return true; } return false; } public boolean maybeApplyConstraint( - final GetObjectMetadataRequest request) { + final HeadObjectRequest.Builder requestBuilder) { if (policy.getMode() == ChangeDetectionPolicy.Mode.Server && revisionId != null) { - policy.applyRevisionConstraint(request, revisionId); + policy.applyRevisionConstraint(requestBuilder, revisionId); return true; } return false; @@ -168,7 +168,7 @@ public boolean maybeApplyConstraint( * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processResponse(final S3Object object, + public void processResponse(final GetObjectResponse object, final String operation, final long pos) throws PathIOException { if (object == null) { @@ -191,30 +191,32 @@ public void processResponse(final S3Object object, } } - processMetadata(object.getObjectMetadata(), operation); + processMetadata(object, operation); } /** * Process the response from the server for validation against the * change policy. - * @param copyResult result of a copy operation + * @param copyObjectResponse response of a copy operation * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processResponse(final CopyResult copyResult) + public void processResponse(final CopyObjectResponse copyObjectResponse) throws PathIOException { // ETag (sometimes, depending on encryption and/or multipart) is not the // same on the copied object as the original. Version Id seems to never // be the same on the copy. As such, there isn't really anything that // can be verified on the response, except that a revision ID is present // if required. - String newRevisionId = policy.getRevisionId(copyResult); - LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); - if (newRevisionId == null && policy.isRequireVersion()) { - throw new NoVersionAttributeException(uri, String.format( - "Change detection policy requires %s", - policy.getSource())); - } + // TODO: Commenting out temporarily, due to the TM not returning copyObjectResult + // in the response. +// String newRevisionId = policy.getRevisionId(copyObjectResponse); +// LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); +// if (newRevisionId == null && policy.isRequireVersion()) { +// throw new NoVersionAttributeException(uri, String.format( +// "Change detection policy requires %s", +// policy.getSource())); +// } } /** @@ -227,13 +229,14 @@ public void processResponse(final CopyResult copyResult) * generated (e.g. "copy", "read", "select"). * @throws RemoteFileChangedException if the remote file has changed. */ - public void processException(SdkBaseException e, String operation) throws + public void processException(SdkException e, String operation) throws RemoteFileChangedException { - if (e instanceof AmazonServiceException) { - AmazonServiceException serviceException = (AmazonServiceException) e; - // This isn't really going to be hit due to + if (e instanceof AwsServiceException) { + AwsServiceException serviceException = (AwsServiceException)e; + // TODO: Verify whether this is fixed in SDK v2. + // In SDK v1, this wasn't really going to be hit due to // https://github.com/aws/aws-sdk-java/issues/1644 - if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) { + if (serviceException.statusCode() == SC_412_PRECONDITION_FAILED) { versionMismatches.versionMismatchError(); throw new RemoteFileChangedException(uri, operation, String.format( RemoteFileChangedException.PRECONDITIONS_FAILED @@ -254,12 +257,26 @@ public void processException(SdkBaseException e, String operation) throws * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ - public void processMetadata(final ObjectMetadata metadata, + public void processMetadata(final HeadObjectResponse metadata, final String operation) throws PathIOException { final String newRevisionId = policy.getRevisionId(metadata, uri); processNewRevision(newRevisionId, operation, -1); } + /** + * Process the response from server for validation against the change + * policy. + * @param getObjectResponse response returned from server + * @param operation operation in progress + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processMetadata(final GetObjectResponse getObjectResponse, + final String operation) throws PathIOException { + final String newRevisionId = policy.getRevisionId(getObjectResponse, uri); + processNewRevision(newRevisionId, operation, -1); + } + /** * Validate a revision from the server against our expectations. * @param newRevisionId new revision. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java deleted file mode 100644 index 16459ac45b850..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.impl; - -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.transfer.Copy; -import com.amazonaws.services.s3.transfer.model.CopyResult; - -/** - * Extracts the outcome of a TransferManager-executed copy operation. - */ -public final class CopyOutcome { - - /** - * Result of a successful copy. - */ - private final CopyResult copyResult; - - /** the copy was interrupted. */ - private final InterruptedException interruptedException; - - /** - * The copy raised an AWS Exception of some form. - */ - private final SdkBaseException awsException; - - public CopyOutcome(CopyResult copyResult, - InterruptedException interruptedException, - SdkBaseException awsException) { - this.copyResult = copyResult; - this.interruptedException = interruptedException; - this.awsException = awsException; - } - - public CopyResult getCopyResult() { - return copyResult; - } - - public InterruptedException getInterruptedException() { - return interruptedException; - } - - public SdkBaseException getAwsException() { - return awsException; - } - - /** - * Calls {@code Copy.waitForCopyResult()} to await the result, converts - * it to a copy outcome. - * Exceptions caught and - * @param copy the copy operation. - * @return the outcome. - */ - public static CopyOutcome waitForCopy(Copy copy) { - try { - CopyResult result = copy.waitForCopyResult(); - return new CopyOutcome(result, null, null); - } catch (SdkBaseException e) { - return new CopyOutcome(null, null, e); - } catch (InterruptedException e) { - return new CopyOutcome(null, e, null); - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java index a45bfe46f169f..8f919897372ad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java @@ -24,7 +24,6 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; @@ -40,6 +39,8 @@ import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.util.DurationInfo; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; import static org.apache.hadoop.util.Preconditions.checkArgument; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.maybeAwaitCompletion; @@ -386,9 +387,9 @@ private void asyncDeleteAction( "Delete page of %d keys", keyList.size())) { if (!keyList.isEmpty()) { // first delete the files. - List files = keyList.stream() + List files = keyList.stream() .filter(e -> !e.isDirMarker) - .map(e -> e.keyVersion) + .map(e -> e.objectIdentifier) .collect(Collectors.toList()); LOG.debug("Deleting of {} file objects", files.size()); Invoker.once("Remove S3 Files", @@ -398,9 +399,9 @@ private void asyncDeleteAction( false )); // now the dirs - List dirs = keyList.stream() + List dirs = keyList.stream() .filter(e -> e.isDirMarker) - .map(e -> e.keyVersion) + .map(e -> e.objectIdentifier) .collect(Collectors.toList()); LOG.debug("Deleting of {} directory markers", dirs.size()); // This is invoked with deleteFakeDir. @@ -422,17 +423,17 @@ private void asyncDeleteAction( * to choose which statistics to update. */ private static final class DeleteEntry { - private final DeleteObjectsRequest.KeyVersion keyVersion; + private final ObjectIdentifier objectIdentifier; private final boolean isDirMarker; private DeleteEntry(final String key, final boolean isDirMarker) { - this.keyVersion = new DeleteObjectsRequest.KeyVersion(key); + this.objectIdentifier = ObjectIdentifier.builder().key(key).build(); this.isDirMarker = isDirMarker; } public String getKey() { - return keyVersion.getKey(); + return objectIdentifier.key(); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java index f7e06413a3761..54a91323bc2e2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java @@ -18,9 +18,9 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.AmazonServiceException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; /** * Translate from AWS SDK-wrapped exceptions into IOExceptions with @@ -49,9 +49,9 @@ private ErrorTranslation() { * @return true if the status code and error code mean that the * remote bucket is unknown. */ - public static boolean isUnknownBucket(AmazonServiceException e) { - return e.getStatusCode() == SC_404 - && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode()); + public static boolean isUnknownBucket(AwsServiceException e) { + return e.statusCode() == SC_404_NOT_FOUND + && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.awsErrorDetails().errorCode()); } /** @@ -62,8 +62,8 @@ public static boolean isUnknownBucket(AmazonServiceException e) { * @return true if the status code and error code mean that the * HEAD request returned 404 but the bucket was there. */ - public static boolean isObjectNotFound(AmazonServiceException e) { - return e.getStatusCode() == SC_404 && !isUnknownBucket(e); + public static boolean isObjectNotFound(AwsServiceException e) { + return e.statusCode() == SC_404_NOT_FOUND && !isUnknownBucket(e); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java index f75066e049d3e..275ad40c08df1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java @@ -23,15 +23,17 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TreeMap; import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; @@ -275,7 +277,7 @@ private Map retrieveHeaders( final Statistic statistic) throws IOException { StoreContext context = getStoreContext(); String objectKey = context.pathToKey(path); - ObjectMetadata md; + HeadObjectResponse md; String symbol = statistic.getSymbol(); S3AStatisticsContext instrumentation = context.getInstrumentation(); try { @@ -287,59 +289,67 @@ private Map retrieveHeaders( callbacks.getObjectMetadata(objectKey + "/")); } // all user metadata - Map rawHeaders = md.getUserMetadata(); + Map rawHeaders = md.metadata(); Map headers = new TreeMap<>(); rawHeaders.forEach((key, value) -> headers.put(XA_HEADER_PREFIX + key, encodeBytes(value))); // and add the usual content length &c, if set maybeSetHeader(headers, XA_CACHE_CONTROL, - md.getCacheControl()); + md.cacheControl()); maybeSetHeader(headers, XA_CONTENT_DISPOSITION, - md.getContentDisposition()); + md.contentDisposition()); maybeSetHeader(headers, XA_CONTENT_ENCODING, - md.getContentEncoding()); + md.contentEncoding()); maybeSetHeader(headers, XA_CONTENT_LANGUAGE, - md.getContentLanguage()); + md.contentLanguage()); // If CSE is enabled, use the unencrypted content length. - if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null - && md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) { - maybeSetHeader(headers, XA_CONTENT_LENGTH, - md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH)); - } else { - maybeSetHeader(headers, XA_CONTENT_LENGTH, - md.getContentLength()); + // TODO: CSE is not supported yet, add these headers in during CSE work. +// if (md.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null +// && md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH) != null) { +// maybeSetHeader(headers, XA_CONTENT_LENGTH, +// md.getUserMetaDataOf(Headers.UNENCRYPTED_CONTENT_LENGTH)); +// } else { +// maybeSetHeader(headers, XA_CONTENT_LENGTH, +// md.contentLength()); +// } +// maybeSetHeader(headers, XA_CONTENT_MD5, +// md.getContentMD5()); + // TODO: Add back in else block during CSE work. + maybeSetHeader(headers, XA_CONTENT_LENGTH, + md.contentLength()); + if (md.sdkHttpResponse() != null && md.sdkHttpResponse().headers() != null + && md.sdkHttpResponse().headers().get("Content-Range") != null) { + maybeSetHeader(headers, XA_CONTENT_RANGE, + md.sdkHttpResponse().headers().get("Content-Range").get(0)); } - maybeSetHeader(headers, XA_CONTENT_MD5, - md.getContentMD5()); - maybeSetHeader(headers, XA_CONTENT_RANGE, - md.getContentRange()); maybeSetHeader(headers, XA_CONTENT_TYPE, - md.getContentType()); + md.contentType()); maybeSetHeader(headers, XA_ETAG, - md.getETag()); + md.eTag()); maybeSetHeader(headers, XA_LAST_MODIFIED, - md.getLastModified()); + Date.from(md.lastModified())); // AWS custom headers maybeSetHeader(headers, XA_ARCHIVE_STATUS, - md.getArchiveStatus()); + md.archiveStatus()); maybeSetHeader(headers, XA_OBJECT_LOCK_LEGAL_HOLD_STATUS, - md.getObjectLockLegalHoldStatus()); + md.objectLockLegalHoldStatus()); maybeSetHeader(headers, XA_OBJECT_LOCK_MODE, - md.getObjectLockMode()); + md.objectLockMode()); maybeSetHeader(headers, XA_OBJECT_LOCK_RETAIN_UNTIL_DATE, - md.getObjectLockRetainUntilDate()); + md.objectLockRetainUntilDate()); maybeSetHeader(headers, XA_OBJECT_REPLICATION_STATUS, - md.getReplicationStatus()); + md.replicationStatus()); maybeSetHeader(headers, XA_S3_VERSION_ID, - md.getVersionId()); + md.versionId()); maybeSetHeader(headers, XA_SERVER_SIDE_ENCRYPTION, - md.getSSEAlgorithm()); + md.serverSideEncryptionAsString()); maybeSetHeader(headers, XA_STORAGE_CLASS, - md.getStorageClass()); - maybeSetHeader(headers, XA_STORAGE_CLASS, - md.getReplicationStatus()); + md.storageClassAsString()); + // TODO: check this, looks wrong. + // maybeSetHeader(headers, XA_STORAGE_CLASS, +// md.getReplicationStatus()); return headers; } @@ -458,70 +468,51 @@ public static Optional extractXAttrLongValue(byte[] data) { } /** - * Creates a copy of the passed {@link ObjectMetadata}. - * Does so without using the {@link ObjectMetadata#clone()} method, - * to avoid copying unnecessary headers. + * Creates a copy of the passed metadata. * This operation does not copy the {@code X_HEADER_MAGIC_MARKER} * header to avoid confusion. If a marker file is renamed, * it loses information about any remapped file. * If new fields are added to ObjectMetadata which are not * present in the user metadata headers, they will not be picked * up or cloned unless this operation is updated. - * @param source the {@link ObjectMetadata} to copy + * @param source the source metadata to copy * @param dest the metadata to update; this is the return value. + * @param copyObjectRequestBuilder CopyObjectRequest builder */ - public static void cloneObjectMetadata(ObjectMetadata source, - ObjectMetadata dest) { + public static void cloneObjectMetadata(HeadObjectResponse source, + Map dest, CopyObjectRequest.Builder copyObjectRequestBuilder) { // Possibly null attributes // Allowing nulls to pass breaks it during later use - if (source.getCacheControl() != null) { - dest.setCacheControl(source.getCacheControl()); - } - if (source.getContentDisposition() != null) { - dest.setContentDisposition(source.getContentDisposition()); - } - if (source.getContentEncoding() != null) { - dest.setContentEncoding(source.getContentEncoding()); - } - if (source.getContentMD5() != null) { - dest.setContentMD5(source.getContentMD5()); - } - if (source.getContentType() != null) { - dest.setContentType(source.getContentType()); - } - if (source.getExpirationTime() != null) { - dest.setExpirationTime(source.getExpirationTime()); + if (source.cacheControl() != null) { + copyObjectRequestBuilder.cacheControl(source.cacheControl()); } - if (source.getExpirationTimeRuleId() != null) { - dest.setExpirationTimeRuleId(source.getExpirationTimeRuleId()); + if (source.contentDisposition() != null) { + copyObjectRequestBuilder.contentDisposition(source.contentDisposition()); } - if (source.getHttpExpiresDate() != null) { - dest.setHttpExpiresDate(source.getHttpExpiresDate()); + if (source.contentEncoding() != null) { + copyObjectRequestBuilder.contentEncoding(source.contentEncoding()); } - if (source.getLastModified() != null) { - dest.setLastModified(source.getLastModified()); - } - if (source.getOngoingRestore() != null) { - dest.setOngoingRestore(source.getOngoingRestore()); - } - if (source.getRestoreExpirationTime() != null) { - dest.setRestoreExpirationTime(source.getRestoreExpirationTime()); + + if (source.contentType() != null) { + copyObjectRequestBuilder.contentType(source.contentType()); } - if (source.getSSEAlgorithm() != null) { - dest.setSSEAlgorithm(source.getSSEAlgorithm()); + + if (source.serverSideEncryption() != null) { + copyObjectRequestBuilder.serverSideEncryption(source.serverSideEncryption()); } - if (source.getSSECustomerAlgorithm() != null) { - dest.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm()); + + if (source.sseCustomerAlgorithm() != null) { + copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(source.sseCustomerAlgorithm()); } - if (source.getSSECustomerKeyMd5() != null) { - dest.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5()); + if (source.sseCustomerKeyMD5() != null) { + copyObjectRequestBuilder.copySourceSSECustomerKeyMD5(source.sseCustomerKeyMD5()); } // copy user metadata except the magic marker header. - source.getUserMetadata().entrySet().stream() + source.metadata().entrySet().stream() .filter(e -> !e.getKey().equals(X_HEADER_MAGIC_MARKER)) - .forEach(e -> dest.addUserMetadata(e.getKey(), e.getValue())); + .forEach(e -> dest.put(e.getKey(), e.getValue())); } public interface HeaderProcessingCallbacks { @@ -534,6 +525,6 @@ public interface HeaderProcessingCallbacks { * @throws IOException IO and object access problems. */ @Retries.RetryTranslated - ObjectMetadata getObjectMetadata(String key) throws IOException; + HeadObjectResponse getObjectMetadata(String key) throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 2c34e7b9b6ec2..14e1fdc8cb5eb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -110,11 +110,50 @@ private InternalConstants() { S3A_OPENFILE_KEYS = Collections.unmodifiableSet(keys); } - /** 403 error code. */ - public static final int SC_403 = 403; + /** 200 status code: OK. */ + public static final int SC_200_OK = 200; - /** 404 error code. */ - public static final int SC_404 = 404; + /** 301 status code: Moved Permanently. */ + public static final int SC_301_MOVED_PERMANENTLY = 301; + + /** 307 status code: Temporary Redirect. */ + public static final int SC_307_TEMPORARY_REDIRECT = 307; + + /** 400 status code: Bad Request. */ + public static final int SC_400_BAD_REQUEST = 400; + + /** 401 status code: Unauthorized. */ + public static final int SC_401_UNAUTHORIZED = 401; + + /** 403 status code: Forbidden. */ + public static final int SC_403_FORBIDDEN = 403; + + /** 404 status code: Not Found. */ + public static final int SC_404_NOT_FOUND = 404; + + /** 405 status code: Method Not Allowed. */ + public static final int SC_405_METHOD_NOT_ALLOWED = 405; + + /** 410 status code: Gone. */ + public static final int SC_410_GONE = 410; + + /** 412 status code: Precondition Failed. */ + public static final int SC_412_PRECONDITION_FAILED = 412; + + /** 416 status code: Range Not Satisfiable. */ + public static final int SC_416_RANGE_NOT_SATISFIABLE = 416; + + /** 443 status code: No Response (unofficial). */ + public static final int SC_443_NO_RESPONSE = 443; + + /** 444 status code: No Response (unofficial). */ + public static final int SC_444_NO_RESPONSE = 444; + + /** 500 status code: Internal Server Error. */ + public static final int SC_500_INTERNAL_SERVER_ERROR = 500; + + /** 503 status code: Service Unavailable. */ + public static final int SC_503_SERVICE_UNAVAILABLE = 503; /** Name of the log for throttling events. Value: {@value}. */ public static final String THROTTLE_LOG_NAME = diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java index 5d17ae91b81e7..70c6165c635cb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java @@ -22,15 +22,15 @@ import java.io.InterruptedIOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; @@ -127,7 +127,7 @@ RemoteIterator listFilesAndDirectoryMarkers( * @throws IOException Other IO problems */ @Retries.RetryTranslated - CopyResult copyFile(String srcKey, + CopyObjectResponse copyFile(String srcKey, String destKey, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) @@ -142,14 +142,14 @@ CopyResult copyFile(String srcKey, * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @Retries.RetryRaw void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java index ae4d2fe7a3431..62ed2ba614514 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java @@ -25,9 +25,6 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonClientException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +41,9 @@ import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.OperationDuration; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; @@ -122,7 +122,7 @@ public class RenameOperation extends ExecutingStoreOperation { /** * list of keys to delete on the next (bulk) delete call. */ - private final List keysToDelete = + private final List keysToDelete = new ArrayList<>(); /** @@ -199,7 +199,7 @@ private void completeActiveCopies(String reason) throws IOException { */ private void queueToDelete(Path path, String key) { LOG.debug("Queueing to delete {}", path); - keysToDelete.add(new DeleteObjectsRequest.KeyVersion(key)); + keysToDelete.add(ObjectIdentifier.builder().key(key).build()); } /** @@ -268,7 +268,7 @@ public Long execute() throws IOException { } else { recursiveDirectoryRename(); } - } catch (AmazonClientException | IOException ex) { + } catch (SdkException | IOException ex) { // rename failed. // block for all ongoing copies to complete, successfully or not try { @@ -572,7 +572,7 @@ private Path copySource( */ @Retries.RetryTranslated private void removeSourceObjects( - final List keys) + final List keys) throws IOException { // remove the keys @@ -580,9 +580,9 @@ private void removeSourceObjects( // who is trying to debug why objects are no longer there. if (LOG.isDebugEnabled()) { LOG.debug("Initiating delete operation for {} objects", keys.size()); - for (DeleteObjectsRequest.KeyVersion key : keys) { - LOG.debug(" {} {}", key.getKey(), - key.getVersion() != null ? key.getVersion() : ""); + for (ObjectIdentifier objectIdentifier : keys) { + LOG.debug(" {} {}", objectIdentifier.key(), + objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); } } @@ -619,10 +619,10 @@ private String maybeAddTrailingSlash(String key) { protected IOException convertToIOException(final Exception ex) { if (ex instanceof IOException) { return (IOException) ex; - } else if (ex instanceof SdkBaseException) { + } else if (ex instanceof SdkException) { return translateException("rename " + sourcePath + " to " + destPath, sourcePath.toString(), - (SdkBaseException) ex); + (SdkException) ex); } else { // should never happen, but for completeness return new IOException(ex); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 7227941e34438..1e01253bbf2b4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -18,42 +18,41 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import javax.annotation.Nullable; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.MetadataDirective; +import software.amazon.awssdk.services.s3.model.ObjectCannedACL; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.ServerSideEncryption; +import software.amazon.awssdk.services.s3.model.StorageClass; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.utils.Md5Utils; + import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; @@ -79,8 +78,8 @@ * This is where audit span information is added to the requests, * until it is done in the AWS SDK itself. * - * All created requests will be passed through - * {@link PrepareRequest#prepareRequest(AmazonWebServiceRequest)} before + * All created request builders will be passed to + * {@link PrepareRequest#prepareRequest(SdkRequest.Builder)} before * being returned to the caller. */ public class RequestFactoryImpl implements RequestFactory { @@ -101,7 +100,7 @@ public class RequestFactoryImpl implements RequestFactory { /** * ACL For new objects. */ - private final CannedAccessControlList cannedACL; + private final ObjectCannedACL cannedACL; /** * Max number of multipart entries allowed in a large @@ -147,14 +146,15 @@ protected RequestFactoryImpl( /** * Preflight preparation of AWS request. - * @param web service request - * @return prepared entry. + * @param web service request builder + * @return prepared builder. */ @Retries.OnceRaw - private T prepareRequest(T t) { - return requestPreparer != null - ? requestPreparer.prepareRequest(t) - : t; + private T prepareRequest(T t) { + if (requestPreparer != null) { + requestPreparer.prepareRequest(t); + } + return t; } /** @@ -162,7 +162,7 @@ private T prepareRequest(T t) { * @return an ACL, if any */ @Override - public CannedAccessControlList getCannedACL() { + public ObjectCannedACL getCannedACL() { return cannedACL; } @@ -174,29 +174,6 @@ protected String getBucket() { return bucket; } - /** - * Create the AWS SDK structure used to configure SSE, - * if the encryption secrets contain the information/settings for this. - * @return an optional set of KMS Key settings - */ - @Override - public Optional generateSSEAwsKeyParams() { - return EncryptionSecretOperations.createSSEAwsKeyManagementParams( - encryptionSecrets); - } - - /** - * Create the SSE-C structure for the AWS SDK, if the encryption secrets - * contain the information/settings for this. - * This will contain a secret extracted from the bucket/configuration. - * @return an optional customer key. - */ - @Override - public Optional generateSSECustomerKey() { - return EncryptionSecretOperations.createSSECustomerKey( - encryptionSecrets); - } - /** * Get the encryption algorithm of this endpoint. * @return the encryption algorithm. @@ -229,307 +206,301 @@ public StorageClass getStorageClass() { * request when encryption is enabled. * @param request upload part request */ - protected void setOptionalUploadPartRequestParameters( - UploadPartRequest request) { - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); + protected void uploadPartEncryptionParameters( + UploadPartRequest.Builder builder) { + // TODO: review/refactor together with similar methods for other requests. + // need to set key to get objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } - /** - * Sets server side encryption parameters to the GET reuquest. - * request when encryption is enabled. - * @param request upload part request - */ - protected void setOptionalGetObjectMetadataParameters( - GetObjectMetadataRequest request) { - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } + private CopyObjectRequest.Builder buildCopyObjectRequest() { - /** - * Set the optional parameters when initiating the request (encryption, - * headers, storage, etc). - * @param request request to patch. - */ - protected void setOptionalMultipartUploadRequestParameters( - InitiateMultipartUploadRequest request) { - generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } - - /** - * Set the optional parameters for a PUT request. - * @param request request to patch. - */ - protected void setOptionalPutRequestParameters(PutObjectRequest request) { - generateSSEAwsKeyParams().ifPresent(request::setSSEAwsKeyManagementParams); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - } + CopyObjectRequest.Builder copyObjectRequestBuilder = CopyObjectRequest.builder(); - /** - * Set the optional metadata for an object being created or copied. - * @param metadata to update. - * @param isDirectoryMarker is this for a directory marker? - */ - protected void setOptionalObjectMetadata(ObjectMetadata metadata, - boolean isDirectoryMarker) { - final S3AEncryptionMethods algorithm - = getServerSideEncryptionAlgorithm(); - if (S3AEncryptionMethods.SSE_S3 == algorithm) { - metadata.setSSEAlgorithm(algorithm.getMethod()); - } - if (contentEncoding != null && !isDirectoryMarker) { - metadata.setContentEncoding(contentEncoding); + if (contentEncoding != null) { + copyObjectRequestBuilder.contentEncoding(contentEncoding); } + + return copyObjectRequestBuilder; } - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @return a new metadata instance - */ @Override - public ObjectMetadata newObjectMetadata(long length) { - return createObjectMetadata(length, false); - } + public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, + String dstKey, + HeadObjectResponse srcom) { - /** - * Create a new object metadata instance. - * Any standard metadata headers are added here, for example: - * encryption. - * - * @param length length of data to set in header; Ignored if negative - * @param isDirectoryMarker is this for a directory marker? - * @return a new metadata instance - */ - private ObjectMetadata createObjectMetadata(long length, boolean isDirectoryMarker) { - final ObjectMetadata om = new ObjectMetadata(); - setOptionalObjectMetadata(om, isDirectoryMarker); - if (length >= 0) { - om.setContentLength(length); + CopyObjectRequest.Builder copyObjectRequestBuilder = buildCopyObjectRequest(); + + Map dstom = new HashMap<>(); + HeaderProcessing.cloneObjectMetadata(srcom, dstom, copyObjectRequestBuilder); + copyEncryptionParameters(copyObjectRequestBuilder); + + copyObjectRequestBuilder + .metadata(dstom) + .metadataDirective(MetadataDirective.REPLACE) + .acl(cannedACL); + + if (srcom.storageClass() != null) { + copyObjectRequestBuilder.storageClass(srcom.storageClass()); } - return om; - } - @Override - public CopyObjectRequest newCopyObjectRequest(String srcKey, - String dstKey, - ObjectMetadata srcom) { - CopyObjectRequest copyObjectRequest = - new CopyObjectRequest(getBucket(), srcKey, getBucket(), dstKey); - ObjectMetadata dstom = newObjectMetadata(srcom.getContentLength()); - HeaderProcessing.cloneObjectMetadata(srcom, dstom); - setOptionalObjectMetadata(dstom, false); - copyEncryptionParameters(srcom, copyObjectRequest); - copyObjectRequest.setCannedAccessControlList(cannedACL); - copyObjectRequest.setNewObjectMetadata(dstom); - Optional.ofNullable(srcom.getStorageClass()) - .ifPresent(copyObjectRequest::setStorageClass); - return prepareRequest(copyObjectRequest); + copyObjectRequestBuilder.destinationBucket(getBucket()) + .destinationKey(dstKey).sourceBucket(getBucket()).sourceKey(srcKey); + + return prepareRequest(copyObjectRequestBuilder); } /** * Propagate encryption parameters from source file if set else use the * current filesystem encryption settings. - * @param srcom source object metadata. - * @param copyObjectRequest copy object request body. + * @param copyObjectRequestBuilder copy object request builder. */ - protected void copyEncryptionParameters( - ObjectMetadata srcom, - CopyObjectRequest copyObjectRequest) { - String sourceKMSId = srcom.getSSEAwsKmsKeyId(); - if (isNotEmpty(sourceKMSId)) { - // source KMS ID is propagated - LOG.debug("Propagating SSE-KMS settings from source {}", - sourceKMSId); - copyObjectRequest.setSSEAwsKeyManagementParams( - new SSEAwsKeyManagementParams(sourceKMSId)); - } - switch (getServerSideEncryptionAlgorithm()) { - case SSE_S3: - /* no-op; this is set in destination object metadata */ - break; - - case SSE_C: - generateSSECustomerKey().ifPresent(customerKey -> { - copyObjectRequest.setSourceSSECustomerKey(customerKey); - copyObjectRequest.setDestinationSSECustomerKey(customerKey); - }); - break; + protected void copyEncryptionParameters(CopyObjectRequest.Builder copyObjectRequestBuilder) { - case SSE_KMS: - generateSSEAwsKeyParams().ifPresent( - copyObjectRequest::setSSEAwsKeyManagementParams); - break; - default: + final S3AEncryptionMethods algorithm + = getServerSideEncryptionAlgorithm(); + + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.name()) + .copySourceSSECustomerKey(base64customerKey).copySourceSSECustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))) + .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } } /** * Create a putObject request. * Adds the ACL, storage class and metadata * @param key key of object - * @param metadata metadata header * @param options options for the request, including headers - * @param srcfile source file - * @return the request + * @param length length of object to be uploaded + * @param isDirectoryMarker true if object to be uploaded is a directory marker + * @return the request builder */ @Override - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, + public PutObjectRequest.Builder newPutObjectRequestBuilder(String key, final PutObjectOptions options, - File srcfile) { - Preconditions.checkNotNull(srcfile); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - srcfile); - maybeSetMetadata(options, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); + long length, + boolean isDirectoryMarker) { + + Preconditions.checkArgument(isNotEmpty(key), "Null/empty key"); + + PutObjectRequest.Builder putObjectRequestBuilder = + buildPutObjectRequest(length, isDirectoryMarker); + putObjectRequestBuilder.bucket(getBucket()).key(key); + + if (options != null) { + putObjectRequestBuilder.metadata(options.getHeaders()); + } + + putEncryptionParameters(putObjectRequestBuilder); + if (storageClass != null) { - putObjectRequest.setStorageClass(storageClass); + putObjectRequestBuilder.storageClass(storageClass); } - putObjectRequest.setMetadata(metadata); - return prepareRequest(putObjectRequest); + + return prepareRequest(putObjectRequestBuilder); } - /** - * Create a {@link PutObjectRequest} request. - * The metadata is assumed to have been configured with the size of the - * operation. - * @param key key of object - * @param metadata metadata header - * @param options options for the request - * @param inputStream source data. - * @return the request - */ - @Override - public PutObjectRequest newPutObjectRequest(String key, - ObjectMetadata metadata, - @Nullable final PutObjectOptions options, - InputStream inputStream) { - Preconditions.checkNotNull(inputStream); - Preconditions.checkArgument(isNotEmpty(key), "Null/empty key"); - maybeSetMetadata(options, metadata); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - inputStream, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); - if (storageClass != null) { - putObjectRequest.setStorageClass(storageClass); + private PutObjectRequest.Builder buildPutObjectRequest(long length, boolean isDirectoryMarker) { + + PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder(); + + putObjectRequestBuilder.acl(cannedACL); + + if (length >= 0) { + putObjectRequestBuilder.contentLength(length); + } + + if (contentEncoding != null && !isDirectoryMarker) { + putObjectRequestBuilder.contentEncoding(contentEncoding); + } + + return putObjectRequestBuilder; + } + + private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBuilder) { + final S3AEncryptionMethods algorithm + = getServerSideEncryptionAlgorithm(); + + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + putObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> putObjectRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + putObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } - return prepareRequest(putObjectRequest); } @Override - public PutObjectRequest newDirectoryMarkerRequest(String directory) { + public PutObjectRequest.Builder newDirectoryMarkerRequest(String directory) { String key = directory.endsWith("/") ? directory : (directory + "/"); - // an input stream which is always empty - final InputStream inputStream = new InputStream() { - @Override - public int read() throws IOException { - return -1; - } - }; + // preparation happens in here - final ObjectMetadata metadata = createObjectMetadata(0L, true); - metadata.setContentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY); + PutObjectRequest.Builder putObjectRequestBuilder = buildPutObjectRequest(0L, true); + + putObjectRequestBuilder.bucket(getBucket()).key(key) + .contentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY); - PutObjectRequest putObjectRequest = new PutObjectRequest(getBucket(), key, - inputStream, metadata); - setOptionalPutRequestParameters(putObjectRequest); - putObjectRequest.setCannedAcl(cannedACL); - return prepareRequest(putObjectRequest); + putEncryptionParameters(putObjectRequestBuilder); + + return prepareRequest(putObjectRequestBuilder); } @Override - public ListMultipartUploadsRequest - newListMultipartUploadsRequest(String prefix) { - ListMultipartUploadsRequest request = new ListMultipartUploadsRequest( - getBucket()); + public ListMultipartUploadsRequest.Builder + newListMultipartUploadsRequestBuilder(String prefix) { + + ListMultipartUploadsRequest.Builder requestBuilder = ListMultipartUploadsRequest.builder(); + + requestBuilder.bucket(getBucket()); if (prefix != null) { - request.setPrefix(prefix); + requestBuilder.prefix(prefix); } - return prepareRequest(request); + return prepareRequest(requestBuilder); } @Override - public AbortMultipartUploadRequest newAbortMultipartUploadRequest( + public AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder( String destKey, String uploadId) { - return prepareRequest(new AbortMultipartUploadRequest(getBucket(), - destKey, - uploadId)); + AbortMultipartUploadRequest.Builder requestBuilder = + AbortMultipartUploadRequest.builder().bucket(getBucket()).key(destKey).uploadId(uploadId); + + return prepareRequest(requestBuilder); + } + + private void multipartUploadEncryptionParameters(CreateMultipartUploadRequest.Builder mpuRequestBuilder) { + final S3AEncryptionMethods algorithm + = getServerSideEncryptionAlgorithm(); + + if (S3AEncryptionMethods.SSE_S3 == algorithm) { + mpuRequestBuilder.serverSideEncryption(algorithm.getMethod()); + } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { + mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS); + // Set the KMS key if present, else S3 uses AWS managed key. + EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) + .ifPresent(kmsKey -> mpuRequestBuilder.ssekmsKeyId(kmsKey)); + } else if (S3AEncryptionMethods.SSE_C == algorithm) { + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + mpuRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + } } @Override - public InitiateMultipartUploadRequest newMultipartUploadRequest( + public CreateMultipartUploadRequest.Builder newMultipartUploadRequestBuilder( final String destKey, @Nullable final PutObjectOptions options) throws PathIOException { if (!isMultipartUploadEnabled) { throw new PathIOException(destKey, "Multipart uploads are disabled."); } - final ObjectMetadata objectMetadata = newObjectMetadata(-1); - maybeSetMetadata(options, objectMetadata); - final InitiateMultipartUploadRequest initiateMPURequest = - new InitiateMultipartUploadRequest(getBucket(), - destKey, - objectMetadata); - initiateMPURequest.setCannedACL(getCannedACL()); - if (getStorageClass() != null) { - initiateMPURequest.withStorageClass(getStorageClass()); + + CreateMultipartUploadRequest.Builder requestBuilder = CreateMultipartUploadRequest.builder(); + + if (contentEncoding != null) { + requestBuilder.contentEncoding(contentEncoding); + } + + if (options != null) { + requestBuilder.metadata(options.getHeaders()); + } + + requestBuilder.bucket(getBucket()).key(destKey).acl(cannedACL); + + multipartUploadEncryptionParameters(requestBuilder); + + if (storageClass != null) { + requestBuilder.storageClass(storageClass); } - setOptionalMultipartUploadRequestParameters(initiateMPURequest); - return prepareRequest(initiateMPURequest); + + return prepareRequest(requestBuilder); } @Override - public CompleteMultipartUploadRequest newCompleteMultipartUploadRequest( + public CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder( String destKey, String uploadId, - List partETags) { + List partETags) { // a copy of the list is required, so that the AWS SDK doesn't // attempt to sort an unmodifiable list. - return prepareRequest(new CompleteMultipartUploadRequest(bucket, - destKey, uploadId, new ArrayList<>(partETags))); + CompleteMultipartUploadRequest.Builder requestBuilder = + CompleteMultipartUploadRequest.builder().bucket(bucket).key(destKey).uploadId(uploadId) + .multipartUpload(CompletedMultipartUpload.builder().parts(partETags).build()); + return prepareRequest(requestBuilder); } @Override - public GetObjectMetadataRequest newGetObjectMetadataRequest(String key) { - GetObjectMetadataRequest request = - new GetObjectMetadataRequest(getBucket(), key); - //SSE-C requires to be filled in if enabled for object metadata - setOptionalGetObjectMetadataParameters(request); - return prepareRequest(request); + public HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key) { + + HeadObjectRequest.Builder headObjectRequestBuilder = + HeadObjectRequest.builder().bucket(getBucket()).key(key); + + // need to set key to get metadata for objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + headObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); + + return prepareRequest(headObjectRequestBuilder); } @Override - public GetObjectRequest newGetObjectRequest(String key) { - GetObjectRequest request = new GetObjectRequest(bucket, key); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); + public GetObjectRequest.Builder newGetObjectRequestBuilder(String key) { + GetObjectRequest.Builder builder = GetObjectRequest.builder() + .bucket(bucket) + .key(key); + + // need to set key to get objects encrypted with SSE_C + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); - return prepareRequest(request); + return prepareRequest(builder); } @Override - public UploadPartRequest newUploadPartRequest( + public UploadPartRequest.Builder newUploadPartRequestBuilder( String destKey, String uploadId, int partNumber, - long size, - InputStream uploadStream, - File sourceFile, - long offset) throws PathIOException { + long size) throws PathIOException { checkNotNull(uploadId); - // exactly one source must be set; xor verifies this - checkArgument((uploadStream != null) ^ (sourceFile != null), - "Data source"); checkArgument(size >= 0, "Invalid partition size %s", size); checkArgument(partNumber > 0, "partNumber must be between 1 and %s inclusive, but is %s", - DEFAULT_UPLOAD_PART_COUNT_LIMIT, partNumber); + multipartPartCountLimit, partNumber); LOG.debug("Creating part upload request for {} #{} size {}", uploadId, partNumber, size); @@ -539,88 +510,81 @@ public UploadPartRequest newUploadPartRequest( throw new PathIOException(destKey, String.format(pathErrorMsg, partNumber, multipartPartCountLimit)); } - UploadPartRequest request = new UploadPartRequest() - .withBucketName(getBucket()) - .withKey(destKey) - .withUploadId(uploadId) - .withPartNumber(partNumber) - .withPartSize(size); - if (uploadStream != null) { - // there's an upload stream. Bind to it. - request.setInputStream(uploadStream); - } else { - checkArgument(sourceFile.exists(), - "Source file does not exist: %s", sourceFile); - checkArgument(sourceFile.isFile(), - "Source is not a file: %s", sourceFile); - checkArgument(offset >= 0, "Invalid offset %s", offset); - long length = sourceFile.length(); - checkArgument(offset == 0 || offset < length, - "Offset %s beyond length of file %s", offset, length); - request.setFile(sourceFile); - request.setFileOffset(offset); - } - setOptionalUploadPartRequestParameters(request); - return prepareRequest(request); + UploadPartRequest.Builder builder = UploadPartRequest.builder() + .bucket(getBucket()) + .key(destKey) + .uploadId(uploadId) + .partNumber(partNumber) + .contentLength(size); + uploadPartEncryptionParameters(builder); + return prepareRequest(builder); } @Override - public SelectObjectContentRequest newSelectRequest(String key) { - SelectObjectContentRequest request = new SelectObjectContentRequest(); - request.setBucketName(bucket); - request.setKey(key); - generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - return prepareRequest(request); + public SelectObjectContentRequest.Builder newSelectRequestBuilder(String key) { + SelectObjectContentRequest.Builder requestBuilder = + SelectObjectContentRequest.builder() + .bucket(bucket) + .key(key); + + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + requestBuilder + .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64( + Base64.getDecoder().decode(base64customerKey))); + }); + + return prepareRequest(requestBuilder); } @Override - public ListObjectsRequest newListObjectsV1Request( + public ListObjectsRequest.Builder newListObjectsV1RequestBuilder( final String key, final String delimiter, final int maxKeys) { - ListObjectsRequest request = new ListObjectsRequest() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + ListObjectsRequest.Builder requestBuilder = + ListObjectsRequest.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); - } - @Override - public ListNextBatchOfObjectsRequest newListNextBatchOfObjectsRequest( - ObjectListing prev) { - return prepareRequest(new ListNextBatchOfObjectsRequest(prev)); + return prepareRequest(requestBuilder); } @Override - public ListObjectsV2Request newListObjectsV2Request( + public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder( final String key, final String delimiter, final int maxKeys) { - final ListObjectsV2Request request = new ListObjectsV2Request() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + final ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder() + .bucket(bucket) + .maxKeys(maxKeys) + .prefix(key); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); + + return prepareRequest(requestBuilder); } @Override - public DeleteObjectRequest newDeleteObjectRequest(String key) { - return prepareRequest(new DeleteObjectRequest(bucket, key)); + public DeleteObjectRequest.Builder newDeleteObjectRequestBuilder(String key) { + return prepareRequest(DeleteObjectRequest.builder().bucket(bucket).key(key)); } @Override - public DeleteObjectsRequest newBulkDeleteRequest( - List keysToDelete) { - return prepareRequest( - new DeleteObjectsRequest(bucket) - .withKeys(keysToDelete) - .withQuiet(true)); + public DeleteObjectsRequest.Builder newBulkDeleteRequestBuilder( + List keysToDelete) { + return prepareRequest(DeleteObjectsRequest + .builder() + .bucket(bucket) + .delete(d -> d.objects(keysToDelete).quiet(true))); } @Override @@ -628,23 +592,6 @@ public void setEncryptionSecrets(final EncryptionSecrets secrets) { encryptionSecrets = secrets; } - /** - * Set the metadata from the options if the options are not - * null and the metadata contains headers. - * @param options options for the request - * @param objectMetadata metadata to patch - */ - private void maybeSetMetadata( - @Nullable PutObjectOptions options, - final ObjectMetadata objectMetadata) { - if (options != null) { - Map headers = options.getHeaders(); - if (headers != null) { - objectMetadata.setUserMetadata(headers); - } - } - } - /** * Create a builder. * @return new builder. @@ -671,7 +618,7 @@ public static final class RequestFactoryBuilder { /** * ACL For new objects. */ - private CannedAccessControlList cannedACL = null; + private ObjectCannedACL cannedACL = null; /** Content Encoding. */ private String contentEncoding; @@ -754,7 +701,7 @@ public RequestFactoryBuilder withEncryptionSecrets( * @return the builder */ public RequestFactoryBuilder withCannedACL( - final CannedAccessControlList value) { + final ObjectCannedACL value) { cannedACL = value; return this; } @@ -806,11 +753,9 @@ public interface PrepareRequest { /** * Post-creation preparation of AWS request. - * @param t request - * @param request type. - * @return prepared entry. + * @param t request builder */ @Retries.OnceRaw - T prepareRequest(T t); + void prepareRequest(SdkRequest.Builder t); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java index 4ab5bc6a99245..b7eae8ead7096 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AMultipartUploader.java @@ -34,10 +34,12 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import org.apache.hadoop.thirdparty.com.google.common.base.Charsets; import org.apache.commons.lang3.StringUtils; @@ -152,18 +154,18 @@ public CompletableFuture putPart( Charsets.UTF_8); return context.submit(new CompletableFuture<>(), () -> { - UploadPartRequest request = writeOperations.newUploadPartRequest(key, - uploadIdString, partNumber, (int) lengthInBytes, inputStream, - null, 0L); - UploadPartResult result = writeOperations.uploadPart(request, statistics); + UploadPartRequest request = writeOperations.newUploadPartRequestBuilder(key, + uploadIdString, partNumber, lengthInBytes).build(); + RequestBody body = RequestBody.fromInputStream(inputStream, lengthInBytes); + UploadPartResponse response = writeOperations.uploadPart(request, body, statistics); statistics.partPut(lengthInBytes); - String eTag = result.getETag(); + String eTag = response.eTag(); return BBPartHandle.from( ByteBuffer.wrap( buildPartHandlePayload( filePath.toUri().toString(), uploadIdString, - result.getPartNumber(), + partNumber, eTag, lengthInBytes))); }); @@ -188,7 +190,7 @@ public CompletableFuture complete( String uploadIdStr = new String(uploadIdBytes, 0, uploadIdBytes.length, Charsets.UTF_8); - ArrayList eTags = new ArrayList<>(); + ArrayList eTags = new ArrayList<>(); eTags.ensureCapacity(handles.size()); long totalLength = 0; // built up to identify duplicates -if the size of this set is @@ -201,7 +203,8 @@ public CompletableFuture complete( payload.validate(uploadIdStr, filePath); ids.add(payload.getPartNumber()); totalLength += payload.getLen(); - eTags.add(new PartETag(handle.getKey(), payload.getEtag())); + eTags.add( + CompletedPart.builder().partNumber(handle.getKey()).eTag(payload.getEtag()).build()); } Preconditions.checkArgument(ids.size() == count, "Duplicate PartHandles"); @@ -210,7 +213,7 @@ public CompletableFuture complete( long finalLen = totalLength; return context.submit(new CompletableFuture<>(), trackDurationOfCallable(statistics, MULTIPART_UPLOAD_COMPLETED.getSymbol(), () -> { - CompleteMultipartUploadResult result = + CompleteMultipartUploadResponse result = writeOperations.commitUpload( key, uploadIdStr, @@ -218,7 +221,7 @@ public CompletableFuture complete( finalLen ); - byte[] eTag = result.getETag().getBytes(Charsets.UTF_8); + byte[] eTag = result.eTag().getBytes(Charsets.UTF_8); statistics.uploadCompleted(); return (PathHandle) () -> ByteBuffer.wrap(eTag); })); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java index b566f9ad42765..206d74e549d88 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java @@ -18,12 +18,9 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.Closeable; +import java.io.InputStream; import java.util.concurrent.atomic.AtomicBoolean; -import javax.annotation.Nullable; - -import com.amazonaws.internal.SdkFilterInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,23 +28,19 @@ import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import software.amazon.awssdk.http.Abortable; + import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration; -import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; /** * Drains/aborts s3 or other AWS SDK streams. * It is callable so can be passed directly to a submitter * for async invocation. - * A request object may be passed in; it will be implicitly - * cached until this object is GCd. - * This is because in some versions of the AWS SDK, the S3Object - * has a finalize() method which releases the http connection, - * even when the stream is still open. - * See HADOOP-17338 for details. */ -public class SDKStreamDrainer implements CallableRaisingIOE { +public class SDKStreamDrainer + implements CallableRaisingIOE { private static final Logger LOG = LoggerFactory.getLogger( SDKStreamDrainer.class); @@ -58,17 +51,9 @@ public class SDKStreamDrainer implements CallableRaisingIOE { private final String uri; /** - * Request object; usually S3Object - * Never used, but needed to keep the http connection - * open long enough for draining to take place. + * Stream from the getObject response for draining and closing. */ - @Nullable - private final Closeable requestObject; - - /** - * Stream from the {@link #requestObject} for draining and closing. - */ - private final SdkFilterInputStream sdkStream; + private final TStream sdkStream; /** * Should the request be aborted? @@ -118,7 +103,6 @@ public class SDKStreamDrainer implements CallableRaisingIOE { /** * Prepare to drain the stream. * @param uri URI for messages - * @param requestObject http request object; needed to avoid GC issues. * @param sdkStream stream to close. * @param shouldAbort force an abort; used if explicitly requested. * @param streamStatistics stats to update @@ -126,14 +110,12 @@ public class SDKStreamDrainer implements CallableRaisingIOE { * @param remaining remaining bytes */ public SDKStreamDrainer(final String uri, - @Nullable final Closeable requestObject, - final SdkFilterInputStream sdkStream, + final TStream sdkStream, final boolean shouldAbort, final int remaining, final S3AInputStreamStatistics streamStatistics, final String reason) { this.uri = uri; - this.requestObject = requestObject; this.sdkStream = requireNonNull(sdkStream); this.shouldAbort = shouldAbort; this.remaining = remaining; @@ -233,7 +215,6 @@ private boolean drainOrAbortHttpStream() { LOG.debug("Closing stream"); sdkStream.close(); - cleanupWithLogger(LOG, requestObject); // this MUST come after the close, so that if the IO operations fail // and an abort is triggered, the initial attempt's statistics // aren't collected. @@ -255,8 +236,6 @@ private boolean drainOrAbortHttpStream() { LOG.warn("When aborting {} stream after failing to close it for {}", uri, reason, e); thrown = e; - } finally { - cleanupWithLogger(LOG, requestObject); } streamStatistics.streamClose(true, remaining); @@ -269,11 +248,7 @@ public String getUri() { return uri; } - public Object getRequestObject() { - return requestObject; - } - - public SdkFilterInputStream getSdkStream() { + public TStream getSdkStream() { return sdkStream; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java index 3aa8ad270eedd..c9156f42047b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java @@ -47,6 +47,9 @@ private V2Migration() { } private static final LogExactlyOnce WARN_OF_CUSTOM_SIGNER = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); + private static final LogExactlyOnce WARN_OF_REQUEST_HANDLERS = + new LogExactlyOnce(SDK_V2_UPGRADE_LOG); + private static final LogExactlyOnce WARN_ON_GET_OBJECT_METADATA = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); @@ -87,6 +90,15 @@ public static void v1CustomSignerUsed() { + "once S3A is upgraded to SDK V2"); } + /** + * Warns on use of request handlers. + */ + public static void v1RequestHandlersUsed() { + WARN_OF_REQUEST_HANDLERS.warn( + "The request handler interface has changed in AWS SDK V2, use exception interceptors " + + "once S3A is upgraded to SDK V2"); + } + /** * Warns on use of getObjectMetadata. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java index 3ab0022bb082e..65b5a4235133b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java @@ -19,15 +19,8 @@ package org.apache.hadoop.fs.s3a.prefetch; - import java.io.IOException; -import java.io.InputStream; -import java.util.IdentityHashMap; -import java.util.Map; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,12 +28,17 @@ import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.S3AReadOpContext; +import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.fs.s3a.impl.SDKStreamDrainer; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.fs.statistics.DurationTracker; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + /** * Encapsulates low level interactions with S3 object on AWS. */ @@ -74,12 +72,6 @@ public class S3ARemoteObject { */ private final ChangeTracker changeTracker; - /** - * Maps a stream returned by openForRead() to the associated S3 object. - * That allows us to close the object when closing the stream. - */ - private final Map s3Objects; - /** * uri of the object being read. */ @@ -123,7 +115,6 @@ public S3ARemoteObject( this.client = client; this.streamStatistics = streamStatistics; this.changeTracker = changeTracker; - this.s3Objects = new IdentityHashMap<>(); this.uri = this.getPath(); } @@ -187,21 +178,23 @@ public long size() { * @throws IllegalArgumentException if offset is greater than or equal to file size. * @throws IllegalArgumentException if size is greater than the remaining bytes. */ - public InputStream openForRead(long offset, int size) throws IOException { + public ResponseInputStream openForRead(long offset, int size) + throws IOException { Validate.checkNotNegative(offset, "offset"); Validate.checkLessOrEqual(offset, "offset", size(), "size()"); Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset"); streamStatistics.streamOpened(); - final GetObjectRequest request = - client.newGetRequest(s3Attributes.getKey()) - .withRange(offset, offset + size - 1); - changeTracker.maybeApplyConstraint(request); + final GetObjectRequest request = client + .newGetRequestBuilder(s3Attributes.getKey()) + .range(S3AUtils.formatRange(offset, offset + size - 1)) + .applyMutation(changeTracker::maybeApplyConstraint) + .build(); String operation = String.format( "%s %s at %d", S3AInputStream.OPERATION_OPEN, uri, offset); DurationTracker tracker = streamStatistics.initiateGetRequest(); - S3Object object = null; + ResponseInputStream object = null; try { object = Invoker.once(operation, uri, () -> client.getObject(request)); @@ -212,27 +205,14 @@ public InputStream openForRead(long offset, int size) throws IOException { tracker.close(); } - changeTracker.processResponse(object, operation, offset); - InputStream stream = object.getObjectContent(); - synchronized (s3Objects) { - s3Objects.put(stream, object); - } - - return stream; + changeTracker.processResponse(object.response(), operation, offset); + return object; } - void close(InputStream inputStream, int numRemainingBytes) { - S3Object obj; - synchronized (s3Objects) { - obj = s3Objects.remove(inputStream); - if (obj == null) { - throw new IllegalArgumentException("inputStream not found"); - } - } + void close(ResponseInputStream inputStream, int numRemainingBytes) { SDKStreamDrainer drainer = new SDKStreamDrainer( uri, - obj, - (S3ObjectInputStream)inputStream, + inputStream, false, numRemainingBytes, streamStatistics, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java index 89ea77d6d0ebb..b49b2699f916b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObjectReader.java @@ -22,7 +22,6 @@ import java.io.Closeable; import java.io.EOFException; import java.io.IOException; -import java.io.InputStream; import java.net.SocketTimeoutException; import java.nio.ByteBuffer; @@ -33,6 +32,9 @@ import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_REMOTE_BLOCK_READ; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; @@ -144,7 +146,8 @@ private void readOneBlock(ByteBuffer buffer, long offset, int size) return; } - InputStream inputStream = remoteObject.openForRead(offset, readSize); + ResponseInputStream inputStream = + remoteObject.openForRead(offset, readSize); int numRemainingBytes = readSize; byte[] bytes = new byte[READ_BUFFER_SIZE]; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 608f9168c24cc..63913afac79b1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -33,11 +33,11 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.MultipartUpload; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.MultipartUpload; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -694,11 +694,11 @@ private void processUploads(PrintStream out) throws IOException { count++; if (mode == Mode.ABORT || mode == Mode.LIST || verbose) { println(out, "%s%s %s", mode == Mode.ABORT ? "Deleting: " : "", - upload.getKey(), upload.getUploadId()); + upload.key(), upload.uploadId()); } if (mode == Mode.ABORT) { writeOperationHelper - .abortMultipartUpload(upload.getKey(), upload.getUploadId(), + .abortMultipartUpload(upload.key(), upload.uploadId(), true, LOG_EVENT); } } @@ -726,7 +726,7 @@ private boolean olderThan(MultipartUpload u, long msec) { return true; } Date ageDate = new Date(System.currentTimeMillis() - msec); - return ageDate.compareTo(u.getInitiated()) >= 0; + return ageDate.compareTo(Date.from(u.initiated())) >= 0; } private void processArgs(List args, PrintStream out) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java new file mode 100644 index 0000000000000..6ff195609cb10 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.Enumeration; +import java.util.NoSuchElementException; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.LinkedBlockingQueue; + +import org.reactivestreams.Subscriber; +import org.reactivestreams.Subscription; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; + +/** + * Implements the {@link Enumeration} interface by subscribing to a + * {@link SdkPublisher} instance. The enumeration will buffer a fixed + * number of elements and only request new ones from the publisher + * when they are consumed. Calls to {@link #hasMoreElements()} and + * {@link #nextElement()} may block while waiting for new elements. + * @param the type of element. + */ +public final class BlockingEnumeration implements Enumeration { + private static final class Signal { + public final T element; + public final Throwable error; + + public Signal(T element) { + this.element = element; + this.error = null; + } + + public Signal(Throwable error) { + this.element = null; + this.error = error; + } + } + + private final Signal END_SIGNAL = new Signal<>((Throwable)null); + private final CompletableFuture subscription = new CompletableFuture<>(); + private final BlockingQueue> signalQueue; + private final int bufferSize; + private Signal current = null; + + /** + * Create an enumeration with a fixed buffer size and an + * optional injected first element. + * @param publisher the publisher feeding the enumeration. + * @param bufferSize the buffer size. + * @param firstElement (optional) first element the enumeration will return. + */ + public BlockingEnumeration(SdkPublisher publisher, + final int bufferSize, + final T firstElement) { + this.signalQueue = new LinkedBlockingQueue<>(); + this.bufferSize = bufferSize; + if (firstElement != null) { + this.current = new Signal<>(firstElement); + } + publisher.subscribe(new EnumerationSubscriber()); + } + + /** + * Create an enumeration with a fixed buffer size. + * @param publisher the publisher feeding the enumeration. + * @param bufferSize the buffer size. + */ + public BlockingEnumeration(SdkPublisher publisher, + final int bufferSize) { + this(publisher, bufferSize, null); + } + + @Override + public boolean hasMoreElements() { + if (current == null) { + try { + current = signalQueue.take(); + } catch (InterruptedException e) { + current = new Signal<>(e); + subscription.thenAccept(Subscription::cancel); + } + } + if (current.error != null) { + if (current.error instanceof SdkException) { + throw (SdkException)current.error; + } else { + throw SdkException.create("Unexpected error", current.error); + } + } + return current != END_SIGNAL; + } + + @Override + public T nextElement() { + if (!hasMoreElements()) { + throw new NoSuchElementException(); + } + T element = current.element; + current = null; + subscription.thenAccept(s -> s.request(1)); + return element; + } + + private final class EnumerationSubscriber implements Subscriber { + + @Override + public void onSubscribe(Subscription s) { + long request = bufferSize; + if (current != null) { + request--; + } + if (request > 0) { + s.request(request); + } + subscription.complete(s); + } + + @Override + public void onNext(T t) { + signalQueue.add(new Signal<>(t)); + } + + @Override + public void onError(Throwable t) { + signalQueue.add(new Signal<>(t)); + } + + @Override + public void onComplete() { + signalQueue.add(END_SIGNAL); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java index 9c79cc1004ce1..95cad54338344 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java @@ -21,13 +21,6 @@ import java.io.IOException; import java.util.Locale; -import com.amazonaws.services.s3.model.CSVInput; -import com.amazonaws.services.s3.model.CSVOutput; -import com.amazonaws.services.s3.model.ExpressionType; -import com.amazonaws.services.s3.model.InputSerialization; -import com.amazonaws.services.s3.model.OutputSerialization; -import com.amazonaws.services.s3.model.QuoteFields; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,6 +35,14 @@ import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.WriteOperationHelper; +import software.amazon.awssdk.services.s3.model.CSVInput; +import software.amazon.awssdk.services.s3.model.CSVOutput; +import software.amazon.awssdk.services.s3.model.ExpressionType; +import software.amazon.awssdk.services.s3.model.InputSerialization; +import software.amazon.awssdk.services.s3.model.OutputSerialization; +import software.amazon.awssdk.services.s3.model.QuoteFields; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; + import static org.apache.hadoop.util.Preconditions.checkNotNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; @@ -145,9 +146,9 @@ public SelectObjectContentRequest buildSelectRequest( Preconditions.checkState(isEnabled(), "S3 Select is not enabled for %s", path); - SelectObjectContentRequest request = operations.newSelectRequest(path); + SelectObjectContentRequest.Builder request = operations.newSelectRequestBuilder(path); buildRequest(request, expression, builderOptions); - return request; + return request.build(); } /** @@ -175,14 +176,14 @@ private SelectInputStream executeSelect( } boolean sqlInErrors = builderOptions.getBoolean(SELECT_ERRORS_INCLUDE_SQL, errorsIncludeSql); - String expression = request.getExpression(); + String expression = request.expression(); final String errorText = sqlInErrors ? expression : "Select"; if (sqlInErrors) { LOG.info("Issuing SQL request {}", expression); } + SelectEventStreamPublisher selectPublisher = operations.select(path, request, errorText); return new SelectInputStream(readContext, - objectAttributes, - operations.select(path, request, errorText)); + objectAttributes, selectPublisher); } /** @@ -197,14 +198,14 @@ private SelectInputStream executeSelect( *

  • The default values in {@link SelectConstants}
  • * * - * @param request request to build up + * @param requestBuilder request to build up * @param expression SQL expression * @param builderOptions the options which came in from the openFile builder. * @throws IllegalArgumentException if an option is somehow invalid. * @throws IOException if an option is somehow invalid. */ void buildRequest( - final SelectObjectContentRequest request, + final SelectObjectContentRequest.Builder requestBuilder, final String expression, final Configuration builderOptions) throws IllegalArgumentException, IOException { @@ -213,7 +214,6 @@ void buildRequest( final Configuration ownerConf = operations.getConf(); - String inputFormat = builderOptions.get(SELECT_INPUT_FORMAT, SELECT_FORMAT_CSV).toLowerCase(Locale.ENGLISH); Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(inputFormat), @@ -224,34 +224,24 @@ void buildRequest( Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(outputFormat), "Unsupported output format %s", outputFormat); - request.setExpressionType(ExpressionType.SQL); - request.setExpression(expandBackslashChars(expression)); - - InputSerialization inputSerialization = buildCsvInputRequest(ownerConf, - builderOptions); - String compression = opt(builderOptions, - ownerConf, - SELECT_INPUT_COMPRESSION, - COMPRESSION_OPT_NONE, - true).toUpperCase(Locale.ENGLISH); - if (isNotEmpty(compression)) { - inputSerialization.setCompressionType(compression); - } - request.setInputSerialization(inputSerialization); - - request.setOutputSerialization(buildCSVOutput(ownerConf, builderOptions)); + requestBuilder.expressionType(ExpressionType.SQL); + requestBuilder.expression(expandBackslashChars(expression)); + requestBuilder.inputSerialization( + buildCsvInput(ownerConf, builderOptions)); + requestBuilder.outputSerialization( + buildCSVOutput(ownerConf, builderOptions)); } /** - * Build the CSV input request. + * Build the CSV input format for a request. * @param ownerConf FS owner configuration * @param builderOptions options on the specific request - * @return the constructed request + * @return the input format * @throws IllegalArgumentException argument failure * @throws IOException validation failure */ - public InputSerialization buildCsvInputRequest( + public InputSerialization buildCsvInput( final Configuration ownerConf, final Configuration builderOptions) throws IllegalArgumentException, IOException { @@ -283,28 +273,35 @@ public InputSerialization buildCsvInputRequest( CSV_INPUT_QUOTE_ESCAPE_CHARACTER_DEFAULT); // CSV input - CSVInput csv = new CSVInput(); - csv.setFieldDelimiter(fieldDelimiter); - csv.setRecordDelimiter(recordDelimiter); - csv.setComments(commentMarker); - csv.setQuoteCharacter(quoteCharacter); + CSVInput.Builder csvBuilder = CSVInput.builder() + .fieldDelimiter(fieldDelimiter) + .recordDelimiter(recordDelimiter) + .comments(commentMarker) + .quoteCharacter(quoteCharacter); if (StringUtils.isNotEmpty(quoteEscapeCharacter)) { - csv.setQuoteEscapeCharacter(quoteEscapeCharacter); + csvBuilder.quoteEscapeCharacter(quoteEscapeCharacter); } - csv.setFileHeaderInfo(headerInfo); - - InputSerialization inputSerialization = new InputSerialization(); - inputSerialization.setCsv(csv); - - return inputSerialization; + csvBuilder.fileHeaderInfo(headerInfo); + InputSerialization.Builder inputSerialization = + InputSerialization.builder() + .csv(csvBuilder.build()); + String compression = opt(builderOptions, + ownerConf, + SELECT_INPUT_COMPRESSION, + COMPRESSION_OPT_NONE, + true).toUpperCase(Locale.ENGLISH); + if (isNotEmpty(compression)) { + inputSerialization.compressionType(compression); + } + return inputSerialization.build(); } /** - * Build CSV output for a request. + * Build CSV output format for a request. * @param ownerConf FS owner configuration * @param builderOptions options on the specific request - * @return the constructed request + * @return the output format * @throws IllegalArgumentException argument failure * @throws IOException validation failure */ @@ -333,21 +330,19 @@ public OutputSerialization buildCSVOutput( CSV_OUTPUT_QUOTE_FIELDS, CSV_OUTPUT_QUOTE_FIELDS_ALWAYS).toUpperCase(Locale.ENGLISH); - // output is CSV, always - OutputSerialization outputSerialization - = new OutputSerialization(); - CSVOutput csvOut = new CSVOutput(); - csvOut.setQuoteCharacter(quoteCharacter); - csvOut.setQuoteFields( - QuoteFields.fromValue(quoteFields)); - csvOut.setFieldDelimiter(fieldDelimiter); - csvOut.setRecordDelimiter(recordDelimiter); + CSVOutput.Builder csvOutputBuilder = CSVOutput.builder() + .quoteCharacter(quoteCharacter) + .quoteFields(QuoteFields.fromValue(quoteFields)) + .fieldDelimiter(fieldDelimiter) + .recordDelimiter(recordDelimiter); if (!quoteEscapeCharacter.isEmpty()) { - csvOut.setQuoteEscapeCharacter(quoteEscapeCharacter); + csvOutputBuilder.quoteEscapeCharacter(quoteEscapeCharacter); } - outputSerialization.setCsv(csvOut); - return outputSerialization; + // output is CSV, always + return OutputSerialization.builder() + .csv(csvOutputBuilder.build()) + .build(); } /** @@ -359,18 +354,18 @@ public OutputSerialization buildCSVOutput( public static String toString(final SelectObjectContentRequest request) { StringBuilder sb = new StringBuilder(); sb.append("SelectObjectContentRequest{") - .append("bucket name=").append(request.getBucketName()) - .append("; key=").append(request.getKey()) - .append("; expressionType=").append(request.getExpressionType()) - .append("; expression=").append(request.getExpression()); - InputSerialization input = request.getInputSerialization(); + .append("bucket name=").append(request.bucket()) + .append("; key=").append(request.key()) + .append("; expressionType=").append(request.expressionType()) + .append("; expression=").append(request.expression()); + InputSerialization input = request.inputSerialization(); if (input != null) { sb.append("; Input") .append(input.toString()); } else { sb.append("; Input Serialization: none"); } - OutputSerialization out = request.getOutputSerialization(); + OutputSerialization out = request.outputSerialization(); if (out != null) { sb.append("; Output") .append(out.toString()); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java new file mode 100644 index 0000000000000..c71ea5f1623a1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.SequenceInputStream; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +import org.reactivestreams.Subscriber; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.EndEvent; +import software.amazon.awssdk.services.s3.model.RecordsEvent; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; +import software.amazon.awssdk.utils.ToString; + +/** + * Async publisher of {@link SelectObjectContentEventStream}s returned + * from a SelectObjectContent call. + */ +public final class SelectEventStreamPublisher implements + SdkPublisher { + + private final CompletableFuture selectOperationFuture; + private final SelectObjectContentResponse response; + private final SdkPublisher publisher; + + /** + * Create the publisher. + * @param selectOperationFuture SelectObjectContent future + * @param response SelectObjectContent response + * @param publisher SelectObjectContentEventStream publisher to wrap + */ + public SelectEventStreamPublisher( + CompletableFuture selectOperationFuture, + SelectObjectContentResponse response, + SdkPublisher publisher) { + this.selectOperationFuture = selectOperationFuture; + this.response = response; + this.publisher = publisher; + } + + /** + * Retrieve an input stream to the subset of the S3 object that matched the select query. + * This is equivalent to loading the content of all RecordsEvents into an InputStream. + * This will lazily-load the content from S3, minimizing the amount of memory used. + * @param onEndEvent callback on the end event + * @return the input stream + */ + public AbortableInputStream toRecordsInputStream(Consumer onEndEvent) { + SdkPublisher recordInputStreams = this.publisher + .filter(e -> { + if (e instanceof RecordsEvent) { + return true; + } else if (e instanceof EndEvent) { + onEndEvent.accept((EndEvent) e); + } + return false; + }) + .map(e -> ((RecordsEvent) e).payload().asInputStream()); + + // Subscribe to the async publisher using an enumeration that will + // buffer a single chunk (RecordsEvent's payload) at a time and + // block until it is consumed. + // Also inject an empty stream as the first element that + // SequenceInputStream will request on construction. + BlockingEnumeration enumeration = + new BlockingEnumeration(recordInputStreams, 1, EMPTY_STREAM); + return AbortableInputStream.create( + new SequenceInputStream(enumeration), + this::cancel); + } + + /** + * The response from the SelectObjectContent call. + * @return the response object + */ + public SelectObjectContentResponse response() { + return response; + } + + @Override + public void subscribe(Subscriber subscriber) { + publisher.subscribe(subscriber); + } + + /** + * Cancel the operation. + */ + public void cancel() { + selectOperationFuture.cancel(true); + } + + @Override + public String toString() { + return ToString.builder("SelectObjectContentEventStream") + .add("response", response) + .add("publisher", publisher) + .build(); + } + + private static final InputStream EMPTY_STREAM = + new ByteArrayInputStream(new byte[0]); +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java index f6ae52eba5346..a2f5f28dc4c87 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java @@ -23,12 +23,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AbortedException; -import com.amazonaws.services.s3.model.SelectObjectContentEvent; -import com.amazonaws.services.s3.model.SelectObjectContentEventVisitor; -import com.amazonaws.services.s3.model.SelectObjectContentResult; -import com.amazonaws.services.s3.model.SelectRecordsInputStream; import org.apache.hadoop.util.Preconditions; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +40,9 @@ import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.io.IOUtils; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.http.AbortableInputStream; + import static org.apache.hadoop.util.Preconditions.checkNotNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.s3a.Invoker.once; @@ -93,7 +92,7 @@ public class SelectInputStream extends FSInputStream implements * Abortable response stream. * This is guaranteed to never be null. */ - private final SelectRecordsInputStream wrappedStream; + private final AbortableInputStream wrappedStream; private final String bucket; @@ -112,14 +111,14 @@ public class SelectInputStream extends FSInputStream implements * The read attempt is initiated immediately. * @param readContext read context * @param objectAttributes object attributes from a HEAD request - * @param selectResponse response from the already executed call + * @param selectPublisher event stream publisher from the already executed call * @throws IOException failure */ @Retries.OnceTranslated public SelectInputStream( final S3AReadOpContext readContext, final S3ObjectAttributes objectAttributes, - final SelectObjectContentResult selectResponse) throws IOException { + final SelectEventStreamPublisher selectPublisher) throws IOException { Preconditions.checkArgument(isNotEmpty(objectAttributes.getBucket()), "No Bucket"); Preconditions.checkArgument(isNotEmpty(objectAttributes.getKey()), @@ -132,17 +131,17 @@ public SelectInputStream( this.readahead = readContext.getReadahead(); this.streamStatistics = readContext.getS3AStatisticsContext() .newInputStreamStatistics(); - SelectRecordsInputStream stream = once( + + AbortableInputStream stream = once( "S3 Select", uri, - () -> selectResponse.getPayload() - .getRecordsInputStream(new SelectObjectContentEventVisitor() { - @Override - public void visit(final SelectObjectContentEvent.EndEvent event) { - LOG.debug("Completed successful S3 select read from {}", uri); - completedSuccessfully.set(true); - } - })); + () -> { + return selectPublisher.toRecordsInputStream(e -> { + LOG.debug("Completed successful S3 select read from {}", uri); + completedSuccessfully.set(true); + }); + }); + this.wrappedStream = checkNotNull(stream); // this stream is already opened, so mark as such in the statistics. streamStatistics.streamOpened(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java new file mode 100644 index 0000000000000..c08793defaaff --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; + +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AUtils; + +import static org.apache.hadoop.fs.s3a.WriteOperationHelper.WriteOperationHelperCallbacks; + +/** + * Helper for SelectObjectContent queries against an S3 Bucket. + */ +public final class SelectObjectContentHelper { + + /** + * Execute an S3 Select operation. + * @param writeOperationHelperCallbacks helper callbacks + * @param source source for selection + * @param request Select request to issue. + * @param action the action for use in exception creation + * @return the select response event stream publisher + * @throws IOException on failure + */ + public static SelectEventStreamPublisher select( + WriteOperationHelperCallbacks writeOperationHelperCallbacks, + Path source, + SelectObjectContentRequest request, + String action) + throws IOException { + try { + Handler handler = new Handler(); + CompletableFuture selectOperationFuture = + writeOperationHelperCallbacks.selectObjectContent(request, handler); + return handler.eventPublisher(selectOperationFuture).join(); + } catch (Throwable e) { + if (e instanceof CompletionException) { + e = e.getCause(); + } + IOException translated; + if (e instanceof SdkException) { + translated = S3AUtils.translateException(action, source, + (SdkException)e); + } else { + translated = new IOException(e); + } + throw translated; + } + } + + private static class Handler implements SelectObjectContentResponseHandler { + private volatile CompletableFuture>> responseAndPublisherFuture = + new CompletableFuture<>(); + + private volatile SelectObjectContentResponse response; + + public CompletableFuture eventPublisher( + CompletableFuture selectOperationFuture) { + return responseAndPublisherFuture.thenApply(p -> + new SelectEventStreamPublisher(selectOperationFuture, + p.getLeft(), p.getRight())); + } + + @Override + public void responseReceived(SelectObjectContentResponse response) { + this.response = response; + } + + @Override + public void onEventStream(SdkPublisher publisher) { + responseAndPublisherFuture.complete(Pair.of(response, publisher)); + } + + @Override + public void exceptionOccurred(Throwable error) { + responseAndPublisherFuture.completeExceptionally(error); + } + + @Override + public void complete() { + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java index c002a4a6dee1d..188bb83ddadab 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java @@ -21,23 +21,18 @@ import java.time.Duration; import java.util.function.Consumer; import java.util.function.LongConsumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.util.TimingInfo; +import software.amazon.awssdk.core.metrics.CoreMetric; +import software.amazon.awssdk.http.HttpMetric; +import software.amazon.awssdk.http.HttpStatusCode; +import software.amazon.awssdk.metrics.MetricCollection; +import software.amazon.awssdk.metrics.MetricPublisher; +import software.amazon.awssdk.metrics.SdkMetric; import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; -import static com.amazonaws.util.AWSRequestMetrics.Field.ClientExecuteTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientRetryCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpRequestTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestMarshallTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestSigningTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ResponseProcessingTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ThrottleException; - /** * Collect statistics from the AWS SDK and forward to an instance of * {@link StatisticsFromAwsSdk} and thence into the S3A statistics. @@ -45,9 +40,9 @@ * See {@code com.facebook.presto.hive.s3.PrestoS3FileSystemMetricCollector} * for the inspiration for this. *

    - * See {@code com.amazonaws.util.AWSRequestMetrics} for metric names. + * See {@code software.amazon.awssdk.core.metrics.CoreMetric} for metric names. */ -public class AwsStatisticsCollector extends RequestMetricCollector { +public class AwsStatisticsCollector implements MetricPublisher { /** * final destination of updates. @@ -65,65 +60,122 @@ public AwsStatisticsCollector(final StatisticsFromAwsSdk collector) { /** * This is the callback from the AWS SDK where metrics * can be collected. - * @param request AWS request - * @param response AWS response + * @param metricCollection metrics collection */ @Override - public void collectMetrics( - final Request request, - final Response response) { - - TimingInfo timingInfo = request.getAWSRequestMetrics().getTimingInfo(); - - counter(timingInfo, HttpClientRetryCount.name(), - collector::updateAwsRetryCount); - counter(timingInfo, RequestCount.name(), - collector::updateAwsRequestCount); - counter(timingInfo, ThrottleException.name(), - collector::updateAwsThrottleExceptionsCount); - - timing(timingInfo, ClientExecuteTime.name(), - collector::noteAwsClientExecuteTime); - timing(timingInfo, HttpRequestTime.name(), - collector::noteAwsRequestTime); - timing(timingInfo, RequestMarshallTime.name(), - collector::noteRequestMarshallTime); - timing(timingInfo, RequestSigningTime.name(), - collector::noteRequestSigningTime); - timing(timingInfo, ResponseProcessingTime.name(), - collector::noteResponseProcessingTime); + public void publish(MetricCollection metricCollection) { + // MetricCollections are nested, so we need to traverse through their + // "children" to collect the desired metrics. E.g.: + // + // ApiCall + // ┌─────────────────────────────────────────┐ + // │ MarshallingDuration=PT0.002808333S │ + // │ RetryCount=0 │ + // │ ApiCallSuccessful=true │ + // │ OperationName=DeleteObject │ + // │ ApiCallDuration=PT0.079801458S │ + // │ CredentialsFetchDuration=PT0.000007083S │ + // │ ServiceId=S3 │ + // └─────────────────────────────────────────┘ + // ApiCallAttempt + // ┌─────────────────────────────────────────────────────────────────┐ + // │ SigningDuration=PT0.000319375S │ + // │ ServiceCallDuration=PT0.078908584S │ + // │ AwsExtendedRequestId=Kmvb2Sz8NuDgIFJPKzLLBhuHgQGmpAjVYBMrSHDvy= │ + // │ HttpStatusCode=204 │ + // │ BackoffDelayDuration=PT0S │ + // │ AwsRequestId=KR0XZCSX │ + // └─────────────────────────────────────────────────────────────────┘ + // HttpClient + // ┌─────────────────────────────────┐ + // │ AvailableConcurrency=1 │ + // │ LeasedConcurrency=0 │ + // │ ConcurrencyAcquireDuration=PT0S │ + // │ PendingConcurrencyAcquires=0 │ + // │ MaxConcurrency=96 │ + // │ HttpClientName=Apache │ + // └─────────────────────────────────┘ + + final long[] throttling = {0}; + recurseThroughChildren(metricCollection) + .collect(Collectors.toList()) + .forEach(m -> { + counter(m, CoreMetric.RETRY_COUNT, retries -> { + collector.updateAwsRetryCount(retries); + collector.updateAwsRequestCount(retries + 1); + }); + + counter(m, HttpMetric.HTTP_STATUS_CODE, statusCode -> { + if (statusCode == HttpStatusCode.THROTTLING) { + throttling[0] += 1; + } + }); + + timing(m, CoreMetric.API_CALL_DURATION, + collector::noteAwsClientExecuteTime); + + timing(m, CoreMetric.SERVICE_CALL_DURATION, + collector::noteAwsRequestTime); + + timing(m, CoreMetric.MARSHALLING_DURATION, + collector::noteRequestMarshallTime); + + timing(m, CoreMetric.SIGNING_DURATION, + collector::noteRequestSigningTime); + + timing(m, CoreMetric.UNMARSHALLING_DURATION, + collector::noteResponseProcessingTime); + }); + + collector.updateAwsThrottleExceptionsCount(throttling[0]); + } + + @Override + public void close() { + } /** * Process a timing. - * @param timingInfo timing info - * @param subMeasurementName sub measurement + * @param collection metric collection + * @param metric metric * @param durationConsumer consumer */ private void timing( - TimingInfo timingInfo, - String subMeasurementName, + MetricCollection collection, + SdkMetric metric, Consumer durationConsumer) { - TimingInfo t1 = timingInfo.getSubMeasurement(subMeasurementName); - if (t1 != null && t1.getTimeTakenMillisIfKnown() != null) { - durationConsumer.accept(Duration.ofMillis( - t1.getTimeTakenMillisIfKnown().longValue())); - } + collection + .metricValues(metric) + .forEach(v -> durationConsumer.accept(v)); } /** * Process a counter. - * @param timingInfo timing info - * @param subMeasurementName sub measurement + * @param collection metric collection + * @param metric metric * @param consumer consumer */ private void counter( - TimingInfo timingInfo, - String subMeasurementName, + MetricCollection collection, + SdkMetric metric, LongConsumer consumer) { - Number n = timingInfo.getCounter(subMeasurementName); - if (n != null) { - consumer.accept(n.longValue()); - } + collection + .metricValues(metric) + .forEach(v -> consumer.accept(v.longValue())); + } + + /** + * Metric collections can be nested. Exposes a stream of the given + * collection and its nested children. + * @param metrics initial collection + * @return a stream of all nested metric collections + */ + private static Stream recurseThroughChildren( + MetricCollection metrics) { + return Stream.concat( + Stream.of(metrics), + metrics.children().stream() + .flatMap(c -> recurseThroughChildren(c))); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index 4ddc5f9478bb1..58078d1c6e04c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -32,10 +32,8 @@ import java.util.Map; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,6 +59,9 @@ import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.ExitUtil; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE_DEFAULT; @@ -784,7 +785,7 @@ long getTotalDeleteRequestDuration() { private MarkerPurgeSummary purgeMarkers( final DirMarkerTracker tracker, final int deletePageSize) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { MarkerPurgeSummary summary = new MarkerPurgeSummary(); // we get a map of surplus markers to delete. @@ -792,13 +793,13 @@ private MarkerPurgeSummary purgeMarkers( = tracker.getSurplusMarkers(); int size = markers.size(); // build a list from the strings in the map - List collect = + List collect = markers.values().stream() - .map(p -> new DeleteObjectsRequest.KeyVersion(p.getKey())) + .map(p -> ObjectIdentifier.builder().key(p.getKey()).build()) .collect(Collectors.toList()); // build an array list for ease of creating the lists of // keys in each page through the subList() method. - List markerKeys = + List markerKeys = new ArrayList<>(collect); // now randomize. Why so? if the list spans multiple S3 partitions, @@ -819,7 +820,7 @@ pages, suffix(pages), while (start < size) { // end is one past the end of the page int end = Math.min(start + deletePageSize, size); - List page = markerKeys.subList(start, + List page = markerKeys.subList(start, end); once("Remove S3 Keys", tracker.getBasePath().toString(), () -> diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java index a701f86f7b0c3..869afddd5582a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java @@ -21,16 +21,16 @@ import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; - import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + /** * Operations which must be offered by the store for {@link MarkerTool}. * These are a proper subset of {@code OperationCallbacks}; this interface @@ -62,14 +62,14 @@ RemoteIterator listObjects( * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. - * @throws AmazonClientException amazon-layer failure. + * @throws AwsServiceException amazon-layer failure. * @throws IOException other IO Exception. */ @Retries.RetryMixed void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java index ccf80e1dde00e..1ffd2b7d4997b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java @@ -21,15 +21,15 @@ import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; - import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + /** * Implement the marker tool operations by forwarding to the * {@link OperationCallbacks} instance provided in the constructor. @@ -55,9 +55,9 @@ public RemoteIterator listObjects(final Path path, @Override public void removeKeys( - final List keysToDelete, + final List keysToDelete, final boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, IOException { + throws MultiObjectDeleteException, AwsServiceException, IOException { operationCallbacks.removeKeys(keysToDelete, deleteFakeDir ); } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md new file mode 100644 index 0000000000000..fa3347bc686c2 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md @@ -0,0 +1,340 @@ + + +# Upgrade S3A to AWS SDK V2: Changelog + +Note: This document is not meant to be committed as part of the final merge, and instead just serves +as a guide to help with reviewing the PR. + +This document tracks changes to S3A during the upgrade to AWS SDK V2. Once the upgrade +is complete, some of its content will be added to the existing document +[Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). + +This work is tracked in [HADOOP-18073](https://issues.apache.org/jira/browse/HADOOP-18073). + +## Contents + +* [Client Configuration](#client-configuration) +* [Endpoint and region configuration](#endpoint-and-region-configuration) +* [List Object](#list-object) +* [EncryptionSecretOperations](#encryptionsecretoperations) +* [GetObjectMetadata](#getobjectmetadata) +* [PutObject](#putobject) +* [CopyObject](#copyobject) +* [MultipartUpload](#multipartupload) +* [GetObject](#getObject) +* [DeleteObject](#deleteobject) +* [Select](#select) +* [CredentialsProvider](#credentialsprovider) +* [Auditing](#auditing) +* [Metric Collection](#metric-collection) +* [Exception Handling](#exception-handling) +* [Failure Injection](#failure-injection) + +### Client Configuration: + +* We now have two clients, a sync S3 Client and an async S3 Client. The async s3 client is required + as the select operation is currently only supported on the async client. Once we are confident in + the current set of changes, we will also be exploring moving other operations to the async client + as this could provide potential performance benefits. However those changes are not in the scope + of this PR, and will be done separately. +* The [createAwsConf](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java#L1190) +method is now split into: + ``` + createClientConfigBuilder // sets request timeout, user agent* + createHttpClientBuilder* // sets max connections, connection timeout, socket timeout + createProxyConfigurationBuilder // sets proxy config, defined in table below + ``` + +The table below lists the configurations S3A was using and what they now map to. + +|SDK V1 |SDK V2 | +|--- |--- | +|setMaxConnections |httpClientBuilder.maxConnections | +|setProtocol |The protocol is now HTTPS by default, and can only be modified by setting an HTTP endpoint on the client builder. This is done when setting the endpoint in getS3Endpoint() | +|setMaxErrorRetry |createRetryPolicyBuilder | +|setConnectionTimeout |httpClientBuilder.connectionTimeout | +|setSocketTimeout |httpClientBuilder.socketTimeout | +|setRequestTimeout |overrideConfigBuilder.apiCallAttemptTimeout | +|setSocketBufferSizeHints |Not supported | +|setSignerOverride |Not done yet | +|setProxyHost |proxyConfigBuilder.endpoint | +|setProxyPort |set when setting proxy host with .endpoint | +|setProxyUsername |proxyConfigBuilder.username | +|setProxyPassword |proxyConfigBuilder.password | +|setProxyDomain |proxyConfigBuilder.ntlmDomain, not supported in async client | +|setProxyWorkstation |proxyConfigBuilder.ntlmWorkstation, not supported in async client | +|setUserAgentPrefix |overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); | +|addHeader |overrideConfigBuilder.putHeader | +|setUseThrottleRetries |not supported | + +### Endpoint and region configuration + +Previously, if no endpoint and region was configured, fall back to using us-east-1. Set +withForceGlobalBucketAccessEnabled(true) which will allow access to buckets not in this region too. +Since the SDK V2 no longer supports cross region access, we need to set the region and endpoint of +the bucket. The behaviour has now been changed to: + +* If no endpoint is specified, use s3.amazonaws.com. +* When setting the endpoint, also set the protocol (HTTP or HTTPS) +* When setting the region, first initiate a default S3 Client with region eu-west-2. Call headBucket + using this client. If the bucket is also in eu-west-2, then this will return a successful + response. Otherwise it will throw an error with status code 301 permanently moved. This error + contains the region of the bucket in its header, which we can then use to configure the client. + +### List Object: + +There is no way to paginate the listObject V1 result, we are +doing [this](https://github.com/ahmarsuhail/hadoop/pull/23/files#diff-4050f95b7e3912145415b6e2f9cd3b0760fcf2ce96bf0980c6c30a6edad2d0fbR2745) +instead. We are trying to get pagination to listObject V1 in the SDK, but will have to use this +workaround for now. + +### EncryptionSecretOperations: + +Two new methods have been added, `getSSECustomerKey` and `getSSEAwsKMSKey`. Previously SDK V1 had +specific classes for these keys `SSECustomerKey` and `SSEAwsKeyManagementParams` . There are no such +classes with V2, and things need to be set manually. For this reason, we simply just return keys as +strings now. And will have to calculate and set md5’s ourselves when building the request. + + +### GetObjectMetadata: + +* `RequestFactory.newGetObjectMetadataRequest` is now `RequestFactory.newHeadObjectRequestBuilder`. +* In `HeaderProcessing.retrieveHeaders()`, called by `getXAttrs()`, + removed `maybeSetHeader(headers, XA_CONTENT_MD5, md.getContentMD5())` as S3 doesn’t ever actually + return an md5 header, regardless of whether you set it during a putObject. It does return + an `etag` which may or may not be an md5 depending on certain conditions. `getContentMD5()` is + always empty, there does not seem to be a need to set this header. +* `RequestFactoryImpl.setOptionalGetObjectMetadataParameters` : Method has been removed and this + logic has been moved to `RequestFactoryImpl.newHeadObjectRequestBuilder()` +* `RequestFactoryImpl.generateSSECustomerKey()` has been removed, and instead + call `EncryptionSecretOperations.createSSECustomerKey` directly in `newHeadObjectRequestBuilder()` + + + +### PutObject + +* Previously, when creating the `putObjectRequest`, you would also give it the data to be uploaded. + So it would be of the form `PutObjectRequest(bucket, key, file/inputstream)`, this is no longer + the case. Instead, the data now needs to be passed in while making the `s3Client.putObject()` + call. For this reason, the data is now part of + the `S3AFileSystem.putObject(putObjectRequest, file, listener)` + and `S3AFileSystem.putObjectDirect(putObjectRequest, putOptions, uploadData, isFile)`. +* `S3ADataBlocks`: Need to make this class public as it’s now used to pass in data + to `putObjectDirect()`, sometimes from outside the package (`MagicCommitTracker` + , `ITestS3AMiscOperations`). +* `ProgressableProgressListener`: You can no longer pass in the `Upload` while initialising the + listener + as `ProgressableProgressListener listener = new ProgressableProgressListener(this, key, upload, progress);` + The upload is now only available after initialising the listener, since the listener needs to be + initialised during creation of the Transfer Manager upload. Previously, you could create the + listener after the starting the TM upload, and attach it. +* The `Upload` is now passed into the progress listener later, + in `listener.uploadCompleted(uploadInfo.getFileUpload());`. +* `UploadInfo`: Previously, since the data to be uploaded was part of `putObjectRequest`, the + transfer manager only returned a single `Upload` type, which could be used to track the upload. + Now, depending on the upload type (eg: File or InputStream), it returns different types. This + class has been updated to return FileUpload info, as it’s only ever used for file uploads + currently. It can be extended to store different transfer types in the future. +* `WriteOperationHelper.createPutObjectRequest() `: Previously the data to be uploaded was part + of `PutObjectRequest`, and so we required two methods to create the request. One for input streams + and one for files. Since the data to be uploaded is no longer part of the request, but instead an + argument in `putObject` , we only need one method now. +* `WriteOperationHelper.newObjectMetadata()`: This method has been removed, as standard metadata, + instead of being part of the `ObjectMetadata`, is now just added while building the request, for + example `putObjectRequestBuilder.serverSideEncryption().` +* `RequestFactory`: Similar to WriteOperationHelper, there is now a single putObjectRequest, + and `newObjectMetadata` has been removed. Instead, all standard metadata is now set in the new + method `buildPutObjectRequest`. +* `RequestFactoryImpl.newObjectMetadata()`: Previously, object metadata was created + using `newObjectMetadata()` and passed into the `newPutObjectRequest()` call. This method has been + removed, as standard metadata, instead of being part of the `ObjectMetadata`, is now just added + while building the request, in `putObjectRequestBuilder.serverSideEncryption().` Content length + and content encoding set in this method is now set in `buildPutObjectRequest()` , and SSE is set + in `putEncryptionParameters()`. +* `RequestFactoryImpl.maybeSetMetadata()` : was a generic method to set user metadata on object + metadata. user metadata now gets set on the request builder, so method has been removed. +* `RequestFactoryImpl.setOptionalPutRequestParameters()` : Method has been removed, and this logic + has been moved to `putEncryptionParameters()` . + +### CopyObject + +* `RequestFactoryImpl.buildPutObjectRequest` : Destination metadata is no longer built + using `newObjectMetadata()` and instead set on the request builder. The logic has a couple of + differences: + * content encoding is set in `buildCopyObjectRequest`, + the `if (contentEncoding != null && !isDirectoryMarker)` can just + be `if (contentEncoding != null)` for copy, as for this `isDirectoryMarker` was always false. + * contentLength is not set, as this is a system defined header, and copied over automatically by + S3 during copy. +* `HeaderProcessing.cloneObjectMetadata`: This was previously also setting a lot of system defined + metadata, eg: `setHttpExpiresDate` and `setLastModified`. These have been removed as they are set + by S3 during the copy. Have tested, and can see they are set automatically regardless of the + metadataDirective (copy or replace). +* `RequestFactoryImpl. copyEncryptionParameters()` : Due to the changes + in `EncryptionSecretOperations`, source and destination encryption params have to be set manually. + +### MultipartUpload + +* `RequestFactoryImpl.newObjectMetdata()` : Metadata is now set on the request builder. For MPU, only +content encoding needs to be set, as per per previous behaviour. Encryption params are set +in ` multipartUploadEncryptionParameters`. + +### GetObject + +* Previously, GetObject returned a `S3Object` response which exposed its content in a + `S3ObjectInputStream` through the `getObjectContent()` method. In SDK v2, the response is + directly a `ResponseInputStream` with the content, while the + `GetObjectResponse` instance can be retrieved by calling `response()` on it. +* The above change simplifies managing the lifetime of the response input stream. In v1, + `S3AInputStream` had to keep a reference to the `S3Object` while holding the wrapped + `S3ObjectInputStream`. When upgraded to SDK v2, it can simply wrap the new + `ResponseInputStream`, which handles lifetime correctly. Same applies + to `SDKStreamDrainer`. Furthermore, the map in `S3ARemoteObject` associating input streams and + `S3Object` instances is no longer needed. +* The range header on a `GetObject` request is now specified as a string, rather than a + `start`-`end` pair. `S3AUtils.formatRange` was introduced to format it. + +### DeleteObject + +In SDK v1, bulk delete would throw a `com.amazonaws.services.s3.model.MultiObjectDeleteException` +in case of partial failure. In v2, instead, it returns a `DeleteObjectsResponse` containing a +list of errors. A new `MultiObjectDeleteException` class was introduced in +`org.apache.hadoop.fs.s3a` and is thrown when appropriate to reproduce the previous behaviour. +* `MultiObjectDeleteSupport.translateDeleteException` was moved into `MultiObjectDeleteException`. +* `ObjectIdentifier` replaces DeleteObjectsRequest.KeyVersion. + +### Select + +In SDK v2, Handling of select requests has changes significantly since SelectObjectContent is +only supported on the new async S3 client. In previous versions, the response to a +SelectObjectContent request exposed the results in a `SelectRecordsInputStream`, which S3A +could wrap in `SelectInputStream`. In v2, instead, the response needs to be handled by an object +implementing `SelectObjectContentResponseHandler`, which can receive an async publisher of +the "events" returned by the service (`SdkPublisher`). + +In order to adapt the new API in S3A, three new classes have been introduced in +`org.apache.hadoop.fs.s3a.select`: + +* `SelectObjectContentHelper`: wraps the `selectObjectContent()` call, provides a custom + response handler to receive the response, and exposes a `SelectEventStreamPublisher`. +* `SelectEventStreamPublisher`: a publisher of select event stream events, which handles the + future returned by the select call and wraps the original publisher. This class provides + a `toRecordsInputStream()` method which returns an input stream containing the results, + reproducing the behaviour of the old `SelectRecordsInputStream`. +* `BlockingEnumeration`: an adapter which lazily requests new elements from the publisher and + exposes them through an `Enumeration` interface. Used in + `SelectEventStreamPublisher.toRecordsInputStream()` to adapt the event publisher into + an enumeration of input streams, eventually passed to a `SequenceInputStream`. + Note that the "lazy" behaviour means that new elements are requested only on `read()` calls on + the input stream. + + + +### CredentialsProvider + +* All credential provider classes implemented in Hadoop now implement V2's `AwsCredentialProvider` +* New adapter class `org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter` has been + added. This converts SDK V1 credential providers to SDK V2’s which + implement `AwsCredentialsProvider`. +* `AWSCredentialProviderList` also implements `AwsCredentialProvider`. But keeps existing + constructors and add methods for V1 credential providers, and wraps V1 cred providers in the + adapter here. This means that custom binding classes in delegation tokens, as well as any custom + credential providers will continue to work. +* Added a new `getCredentials()` method in `AWSCredentialProviderList`, which ensured that custom + binding classes which are calling `AWSCredentialProviderList.getCredentials()`, continue to work. +* The following values `fs.s3a.aws.credentials.provider` are mapped: + as `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`, then map it to V2’s + +|`fs.s3a.aws.credentials.provider` value |Mapped to | +|--- |--- | +|`com.amazonaws.auth.EnvironmentVariableCredentialsProvider` |`software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | +|`com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | +|`com.amazonaws.auth.`InstanceProfileCredentialsProvider`` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | + + +### Auditing + +The SDK v2 offers a new `ExecutionInterceptor` +[interface](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) +which broadly replaces the `RequestHandler2` abstract class from v1. +Switching to the new mechanism in S3A brings: + +* Simplification in `AWSAuditEventCallbacks` (and implementors) which can now extend + `ExecutionInterceptor` +* "Registering" a Span with a request has moved from `requestCreated` to `beforeExecution` + (where an `ExecutionAttributes` instance is first available) +* The ReferrerHeader is built and added to the http request in `modifyHttpRequest`, + rather than in `beforeExecution`, where no http request is yet available +* Dynamic loading of interceptors has been implemented to reproduce previous behaviour + with `RequestHandler2`s. The AWS SDK v2 offers an alternative mechanism, described + [here](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) + under "Interceptor Registration", which could make it redundant. + +In the Transfer Manager, `TransferListener` replaces `TransferStateChangeListener`. S3A code +has been updated and `AuditManagerS3A` implementations now provide an instance of the former to +switch to the active span, but registration of the new listeners is currently commented out because +it causes an incompatibility issue with the internal logger, resulting in `NoSuchMethodError`s, +at least in the current TransferManager Preview release. + + +### Metric Collection + +`AwsStatisticsCollector` has been updated to implement the new `MetricPublisher` interface +and collect the metrics from a `MetricCollection` object. +The following table maps SDK v2 metrics to their equivalent in v1: + +| v2 Metrics | com.amazonaws.util.AWSRequestMetrics.Field | Comment | +|-------------------------------------------------------------|---------------------------------------------|--------------------------------| +| CoreMetric.RETRY_COUNT | HttpClientRetryCount | | +| CoreMetric.RETRY_COUNT | RequestCount | always HttpClientRetryCount+1 | +| HttpMetric.HTTP_STATUS_CODE with HttpStatusCode.THROTTLING | ThrottleException | to be confirmed | +| CoreMetric.API_CALL_DURATION | ClientExecuteTime | | +| CoreMetric.SERVICE_CALL_DURATION | HttpRequestTime | | +| CoreMetric.MARSHALLING_DURATION | RequestMarshallTime | | +| CoreMetric.SIGNING_DURATION | RequestSigningTime | | +| CoreMetric.UNMARSHALLING_DURATION | ResponseProcessingTime | to be confirmed | + +Note that none of the timing metrics (`*_DURATION`) are currently collected in S3A. + +### Exception Handling + +The code to handle exceptions thrown by the SDK has been updated to reflect the changes in v2: + +* `com.amazonaws.SdkBaseException` and `com.amazonaws.AmazonClientException` changes: + * These classes have combined and replaced with + `software.amazon.awssdk.core.exception.SdkException`. +* `com.amazonaws.SdkClientException` changes: + * This class has been replaced with `software.amazon.awssdk.core.exception.SdkClientException`. + * This class now extends `software.amazon.awssdk.core.exception.SdkException`. +* `com.amazonaws.AmazonServiceException` changes: + * This class has been replaced with + `software.amazon.awssdk.awscore.exception.AwsServiceException`. + * This class now extends `software.amazon.awssdk.core.exception.SdkServiceException`, + a new exception type that extends `software.amazon.awssdk.core.exception.SdkException`. + +See also the +[SDK changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#3-exception-changes). + + +### Failure Injection + +While using the SDK v1, failure injection was implemented in `InconsistentAmazonS3CClient`, +which extended the S3 client. In SDK v2, reproducing this approach would not be straightforward, +since the default S3 client is an internal final class. Instead, the same fault injection strategy +is now performed by a `FailureInjectionInterceptor` (see +[ExecutionInterceptor](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html)) +registered on the default client by `InconsistentS3CClientFactory`. +`InconsistentAmazonS3CClient` has been removed. No changes to the user configuration are required. + diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index d2ed9ede0171c..bc5bc64f86327 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1004,20 +1004,18 @@ using an absolute XInclude reference to it. **Warning do not enable any type of failure injection in production. The following settings are for testing only.** -One of the challenges with S3A integration tests was the fact that S3 was an -eventually-consistent storage system. To simulate inconsistencies more -frequently than they would normally surface, S3A supports a shim layer on top of the `AmazonS3Client` -class which artificially delays certain paths from appearing in listings. -This is implemented in the class `InconsistentAmazonS3Client`. +S3A provides an "Inconsistent S3 Client Factory" that can be used to +simulate throttling by injecting random failures on S3 client requests. -Now that S3 is consistent, injecting inconsistency is no longer needed -during testing. -However, it is stil useful to use the other feature of the client: -throttling simulation. -## Simulating List Inconsistencies +**Note** -### Enabling the InconsistentAmazonS3CClient +In previous releases, this factory could also be used to simulate +inconsistencies during testing of S3Guard. Now that S3 is consistent, +injecting inconsistency is no longer needed during testing. + + +### Enabling the InconsistentS3CClientFactory To enable the fault-injecting client via configuration, switch the @@ -1047,7 +1045,7 @@ These exceptions are returned to S3; they do not test the AWS SDK retry logic. -### Using the `InconsistentAmazonS3CClient` in downstream integration tests +### Using the `InconsistentS3CClientFactory` in downstream integration tests The inconsistent client is shipped in the `hadoop-aws` JAR, so it can be used in integration tests. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index a46303f339678..c08108f096458 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -20,9 +20,6 @@ import static org.apache.hadoop.fs.s3a.Constants.*; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.s3.AmazonS3; - import java.net.URI; import org.apache.hadoop.conf.Configuration; @@ -32,6 +29,10 @@ import org.junit.Rule; import org.junit.rules.ExpectedException; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.S3Client; + /** * Abstract base class for S3A unit tests using a mock S3 client and a null * metadata store. @@ -39,17 +40,20 @@ public abstract class AbstractS3AMockTest { protected static final String BUCKET = "mock-bucket"; - protected static final AmazonServiceException NOT_FOUND; - static { - NOT_FOUND = new AmazonServiceException("Not Found"); - NOT_FOUND.setStatusCode(404); - } + protected static final AwsServiceException NOT_FOUND = + AwsServiceException.builder() + .message("Not Found") + .statusCode(404) + .awsErrorDetails(AwsErrorDetails.builder() + .errorCode("") + .build()) + .build(); @Rule public ExpectedException exception = ExpectedException.none(); protected S3AFileSystem fs; - protected AmazonS3 s3; + protected S3Client s3V2; @Before public void setup() throws Exception { @@ -59,10 +63,9 @@ public void setup() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); - s3 = fs.getAmazonS3ClientForTesting("mocking"); + s3V2 = fs.getAmazonS3V2ClientForTesting("mocking"); } - @SuppressWarnings("deprecation") public Configuration createConfiguration() { Configuration conf = new Configuration(); conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class, @@ -78,6 +81,10 @@ public Configuration createConfiguration() { return conf; } + public S3Client getS3Client() { + return s3V2; + } + @After public void teardown() throws Exception { if (fs != null) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java index 4013e9db29a3e..794480d4409fb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.net.util.Base64; @@ -69,33 +69,33 @@ public static void assertEncrypted(S3AFileSystem fs, final S3AEncryptionMethods algorithm, final String kmsKeyArn) throws IOException { - ObjectMetadata md = fs.getObjectMetadata(path); + HeadObjectResponse md = fs.getObjectMetadata(path); String details = String.format( "file %s with encryption algorithm %s and key %s", path, - md.getSSEAlgorithm(), - md.getSSEAwsKmsKeyId()); + md.serverSideEncryptionAsString(), + md.ssekmsKeyId()); switch(algorithm) { case SSE_C: assertNull("Metadata algorithm should have been null in " + details, - md.getSSEAlgorithm()); + md.serverSideEncryptionAsString()); assertEquals("Wrong SSE-C algorithm in " + details, - SSE_C_ALGORITHM, md.getSSECustomerAlgorithm()); + SSE_C_ALGORITHM, md.sseCustomerAlgorithm()); String md5Key = convertKeyToMd5(fs); assertEquals("getSSECustomerKeyMd5() wrong in " + details, - md5Key, md.getSSECustomerKeyMd5()); + md5Key, md.sseCustomerKeyMD5()); break; case SSE_KMS: assertEquals("Wrong algorithm in " + details, - AWS_KMS_SSE_ALGORITHM, md.getSSEAlgorithm()); + AWS_KMS_SSE_ALGORITHM, md.serverSideEncryptionAsString()); assertEquals("Wrong KMS key in " + details, kmsKeyArn, - md.getSSEAwsKmsKeyId()); + md.ssekmsKeyId()); break; default: - assertEquals("AES256", md.getSSEAlgorithm()); + assertEquals("AES256", md.serverSideEncryptionAsString()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index c13c3f48b8466..9e58dba5c9b28 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -32,11 +32,11 @@ import org.junit.Test; import org.junit.rules.Timeout; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getCSVTestPath; @@ -77,20 +77,17 @@ public void testBadConfiguration() throws IOException { * or a public default constructor. */ static class BadCredentialsProviderConstructor - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { @SuppressWarnings("unused") public BadCredentialsProviderConstructor(String fsUri, Configuration conf) { } @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("dummy_key", "dummy_secret"); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("dummy_key", "dummy_secret"); } - @Override - public void refresh() { - } } @Test @@ -125,20 +122,17 @@ private void createFailingFS(Configuration conf) throws IOException { fail("Expected exception - got " + fs); } - static class BadCredentialsProvider implements AWSCredentialsProvider { + static class BadCredentialsProvider implements AwsCredentialsProvider { @SuppressWarnings("unused") public BadCredentialsProvider(Configuration conf) { } @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials("bad_key", "bad_secret"); + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("bad_key", "bad_secret"); } - @Override - public void refresh() { - } } @Test @@ -157,7 +151,6 @@ public void testBadCredentials() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testAnonymousProvider() throws Exception { Configuration conf = new Configuration(); conf.set(AWS_CREDENTIALS_PROVIDER, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index 1071582cc67d2..cf7d822095801 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -20,15 +20,17 @@ import java.util.List; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.AccessControlList; -import com.amazonaws.services.s3.model.Grant; -import com.amazonaws.services.s3.model.GroupGrantee; -import com.amazonaws.services.s3.model.Permission; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectAclRequest; +import software.amazon.awssdk.services.s3.model.GetObjectAclResponse; +import software.amazon.awssdk.services.s3.model.Grant; +import software.amazon.awssdk.services.s3.model.Grantee; +import software.amazon.awssdk.services.s3.model.Permission; +import software.amazon.awssdk.services.s3.model.Type; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -55,7 +57,8 @@ protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, CANNED_ACL); - + // TODO: Check why we need this ACL? V2 does not have a LOG_DELIVERY_WRITE ACL which causes + // this test to fail. conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); // needed because of direct calls made conf.setBoolean(S3AAuditConstants.REJECT_OUT_OF_SPAN_OPERATIONS, false); @@ -89,18 +92,28 @@ private void assertObjectHasLoggingGrant(Path path, boolean isFile) { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("acls"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("acls"); String key = storeContext.pathToKey(path); if (!isFile) { key = key + "/"; } - AccessControlList acl = s3.getObjectAcl(storeContext.getBucket(), - key); - List grants = acl.getGrantsAsList(); + GetObjectAclResponse acl = s3.getObjectAcl(GetObjectAclRequest.builder() + .bucket(storeContext.getBucket()) + .key(key) + .build()); + List grants = acl.grants(); for (Grant grant : grants) { LOG.info("{}", grant.toString()); } - Grant loggingGrant = new Grant(GroupGrantee.LogDelivery, Permission.Write); + // TODO: Review whether this test is required in v2. + // Reproduces v1's GroupGrantee.LogDelivery + Grant loggingGrant = Grant.builder() + .grantee(Grantee.builder() + .type(Type.GROUP) + .uri("http://acs.amazonaws.com/groups/s3/LogDelivery") + .build()) + .permission(Permission.WRITE) + .build(); Assertions.assertThat(grants) .describedAs("ACL grants of object %s", path) .contains(loggingGrant); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 26d00bc7d359a..bc7b2ce3d1b06 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -19,9 +19,6 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.S3ClientOptions; - import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.reflect.FieldUtils; @@ -31,6 +28,8 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.test.GenericTestUtils; + +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -51,6 +50,11 @@ import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; +import software.amazon.awssdk.core.client.config.SdkClientConfiguration; +import software.amazon.awssdk.core.client.config.SdkClientOption; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; @@ -109,7 +113,7 @@ public void testEndpoint() throws Exception { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("test endpoint"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -120,8 +124,11 @@ public void testEndpoint() throws Exception { } else { fail("Unexpected endpoint"); } + // TODO: review way to get the bucket region. + String region = s3.getBucketLocation(b -> b.bucket(fs.getUri().getHost())) + .locationConstraintAsString(); assertEquals("Endpoint config setting and bucket location differ: ", - endPointRegion, s3.getBucketLocation(fs.getUri().getHost())); + endPointRegion, region); } } @@ -346,22 +353,25 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() try { fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("configuration"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("configuration"); assertNotNull(s3); - S3ClientOptions clientOptions = getField(s3, S3ClientOptions.class, - "clientOptions"); + + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, + "clientConfiguration"); + S3Configuration s3Configuration = + (S3Configuration)clientConfiguration.option(SdkClientOption.SERVICE_CONFIGURATION); assertTrue("Expected to find path style access to be switched on!", - clientOptions.isPathStyleAccess()); + s3Configuration.pathStyleAccessEnabled()); byte[] file = ContractTestUtils.toAsciiByteArray("test file"); ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); - } catch (final AWSS3IOException e) { + } catch (final AWSRedirectException e) { LOG.error("Caught exception: ", e); // Catch/pass standard path style access behaviour when live bucket // isn't in the same region as the s3 client default. See // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html - assertEquals(HttpStatus.SC_MOVED_PERMANENTLY, e.getStatusCode()); + assertEquals(HttpStatus.SC_MOVED_PERMANENTLY, e.statusCode()); } catch (final IllegalArgumentException e) { // Path style addressing does not work with AP ARNs if (!fs.getBucket().contains("arn:")) { @@ -378,12 +388,13 @@ public void testDefaultUserAgent() throws Exception { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("User Agent"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("User Agent"); assertNotNull(s3); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); - assertEquals("Hadoop " + VersionInfo.getVersion(), - awsConf.getUserAgentPrefix()); + Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) + .describedAs("User Agent prefix") + .startsWith("Hadoop " + VersionInfo.getVersion()); } @Test @@ -392,12 +403,13 @@ public void testCustomUserAgent() throws Exception { conf.set(Constants.USER_AGENT_PREFIX, "MyApp"); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("User agent"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("User agent"); assertNotNull(s3); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); - assertEquals("MyApp, Hadoop " + VersionInfo.getVersion(), - awsConf.getUserAgentPrefix()); + Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) + .describedAs("User Agent prefix") + .startsWith("MyApp, Hadoop " + VersionInfo.getVersion()); } @Test @@ -405,16 +417,16 @@ public void testRequestTimeout() throws Exception { conf = new Configuration(); conf.set(REQUEST_TIMEOUT, "120"); fs = S3ATestUtils.createTestFileSystem(conf); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)"); - ClientConfiguration awsConf = getField(s3, ClientConfiguration.class, + S3Client s3 = fs.getAmazonS3V2ClientForTesting("Request timeout (ms)"); + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); assertEquals("Configured " + REQUEST_TIMEOUT + " is different than what AWS sdk configuration uses internally", - 120000, awsConf.getRequestTimeout()); + 120000, + clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT).toMillis()); } @Test - @SuppressWarnings("deprecation") public void testCloseIdempotent() throws Throwable { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); @@ -522,6 +534,8 @@ public void testS3SpecificSignerOverride() throws IOException { // Default SIGNING_ALGORITHM, overridden for S3 only config = new Configuration(); config.set(SIGNING_ALGORITHM_S3, s3SignerOverride); + + // TODO: update during signer work. clientConfiguration = S3AUtils .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_S3); Assert.assertEquals(s3SignerOverride, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java index 68ab5bd9e8c19..3a72206641452 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.amazonaws.services.s3.model.ObjectMetadata; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -51,9 +51,9 @@ protected S3AEncryptionMethods getSSEAlgorithm() { @Override protected void assertEncrypted(Path path) throws IOException { - ObjectMetadata md = getFileSystem().getObjectMetadata(path); + HeadObjectResponse md = getFileSystem().getObjectMetadata(path); assertEquals("SSE Algorithm", EncryptionTestUtils.AWS_KMS_SSE_ALGORITHM, - md.getSSEAlgorithm()); - assertThat(md.getSSEAwsKmsKeyId(), containsString("arn:aws:kms:")); + md.serverSideEncryptionAsString()); + assertThat(md.ssekmsKeyId(), containsString("arn:aws:kms:")); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java index 56ce9300dc4e7..95ceae608e17b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java @@ -118,7 +118,7 @@ public void testEncryptionOverRename() throws Throwable { S3AFileSystem fs = getFileSystem(); Path path = path(getMethodName() + "find-encryption-algo"); ContractTestUtils.touch(fs, path); - String sseAlgorithm = fs.getObjectMetadata(path).getSSEAlgorithm(); + String sseAlgorithm = fs.getObjectMetadata(path).serverSideEncryptionAsString(); if(StringUtils.isBlank(sseAlgorithm) || !sseAlgorithm.equals(AWS_KMS_SSE_ALGORITHM)) { skip("Test bucket is not configured with " + AWS_KMS_SSE_ALGORITHM); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index c0f6a4b23226b..48f3fdf91d323 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; - import org.assertj.core.api.Assertions; import org.junit.Assume; @@ -34,10 +32,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.S3Error; + import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.nio.file.AccessDeniedException; +import java.util.stream.Collectors; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.createFiles; @@ -115,12 +117,12 @@ private void removeKeys(S3AFileSystem fileSystem, String... keys) } } - private List buildDeleteRequest( + private List buildDeleteRequest( final String[] keys) { - List request = new ArrayList<>( + List request = new ArrayList<>( keys.length); for (String key : keys) { - request.add(new DeleteObjectsRequest.KeyVersion(key)); + request.add(ObjectIdentifier.builder().key(key).build()); } return request; } @@ -156,12 +158,26 @@ public void testMultiObjectDeleteNoPermissions() throws Throwable { // create a span, expect it to be activated. fs.getAuditSpanSource().createSpan(StoreStatisticNames.OP_DELETE, csvPath.toString(), null); - List keys + List keys = buildDeleteRequest( new String[]{ fs.pathToKey(csvPath), "missing-key.csv" }); + MultiObjectDeleteException ex = intercept( + MultiObjectDeleteException.class, + () -> fs.removeKeys(keys, false)); + final List undeleted = ex.errors().stream() + .map(S3Error::key) + .map(fs::keyToQualifiedPath) + .collect(Collectors.toList()); + final String undeletedFiles = undeleted.stream() + .map(Path::toString) + .collect(Collectors.joining(", ")); + failIf(undeleted.size() != 2, + "undeleted list size wrong: " + undeletedFiles, + ex); + assertTrue("no CSV in " +undeletedFiles, undeleted.contains(csvPath)); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 1a944ec29968a..1bf874b103716 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -25,19 +25,22 @@ import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.GetBucketEncryptionResult; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; import org.assertj.core.api.Assertions; import org.junit.Assume; import org.junit.Test; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetBucketEncryptionRequest; +import software.amazon.awssdk.services.s3.model.GetBucketEncryptionResponse; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; +import org.apache.hadoop.fs.s3a.impl.RequestFactoryImpl; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.apache.hadoop.fs.store.EtagChecksum; import org.apache.hadoop.test.LambdaTestUtils; @@ -106,15 +109,15 @@ public void testCreateNonRecursiveSuccess() throws IOException { public void testPutObjectDirect() throws Throwable { final S3AFileSystem fs = getFileSystem(); try (AuditSpan span = span()) { - ObjectMetadata metadata = fs.newObjectMetadata(-1); - metadata.setContentLength(-1); + RequestFactory factory = RequestFactoryImpl.builder().withBucket(fs.getBucket()).build(); Path path = path("putDirect"); - final PutObjectRequest put = new PutObjectRequest(fs.getBucket(), - path.toUri().getPath(), - new ByteArrayInputStream("PUT".getBytes()), - metadata); + PutObjectRequest.Builder putObjectRequestBuilder = + factory.newPutObjectRequestBuilder(path.toUri().getPath(), null, -1, false); + putObjectRequestBuilder.contentLength(-1L); LambdaTestUtils.intercept(IllegalStateException.class, - () -> fs.putObjectDirect(put, PutObjectOptions.keepingDirs(), null)); + () -> fs.putObjectDirect(putObjectRequestBuilder.build(), PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(new ByteArrayInputStream("PUT".getBytes())), + false, null)); assertPathDoesNotExist("put object was created", path); } } @@ -406,13 +409,15 @@ private static T verifyNoTrailingSlash(String role, T o) { * Gets default encryption settings for the bucket or returns null if default * encryption is disabled. */ - private GetBucketEncryptionResult getDefaultEncryption() throws IOException { + private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("check default encryption"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("check default encryption"); try { return Invoker.once("getBucketEncryption()", fs.getBucket(), - () -> s3.getBucketEncryption(fs.getBucket())); + () -> s3.getBucketEncryption(GetBucketEncryptionRequest.builder() + .bucket(fs.getBucket()) + .build())); } catch (FileNotFoundException e) { return null; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java index 818d2fc889c17..5dc38344de080 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java @@ -18,13 +18,14 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.MultipartUpload; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.junit.Test; +import software.amazon.awssdk.services.s3.model.MultipartUpload; + import java.io.IOException; import java.util.HashSet; import java.util.Set; @@ -114,7 +115,7 @@ private void assertUploadsPresent(MultipartUtils.UploadIterator list, } private MultipartTestUtils.IdKey toIdKey(MultipartUpload mu) { - return new MultipartTestUtils.IdKey(mu.getKey(), mu.getUploadId()); + return new MultipartTestUtils.IdKey(mu.key(), mu.uploadId()); } private Path getPartFilename(int index) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java index d3925d35a99d3..5b6ea46cd8a0a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java @@ -107,7 +107,7 @@ public void testRequesterPaysDisabledFails() throws Throwable { try (FileSystem fs = requesterPaysPath.getFileSystem(conf)) { intercept( AccessDeniedException.class, - "403 Forbidden", + "403", "Expected requester pays bucket to fail without header set", () -> fs.open(requesterPaysPath).close() ); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java index 7c56f8d2ea050..6ccb7ac26040b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java @@ -37,7 +37,6 @@ import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER; import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_ARRAY; import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BUFFER_DISK; -import static org.apache.hadoop.fs.s3a.Constants.FAST_UPLOAD_BYTEBUFFER; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER; import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_REDUCED_REDUNDANCY; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java index 0778662542d88..a77e0ac705854 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java @@ -25,14 +25,13 @@ import java.time.OffsetDateTime; import java.util.concurrent.TimeUnit; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.amazonaws.services.securitytoken.model.Credentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding; @@ -71,7 +70,6 @@ public class ITestS3ATemporaryCredentials extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(ITestS3ATemporaryCredentials.class); - @SuppressWarnings("deprecation") private static final String TEMPORARY_AWS_CREDENTIALS = TemporaryAWSCredentialsProvider.NAME; @@ -120,7 +118,7 @@ public void testSTS() throws IOException { credentials = testFS.shareCredentials("testSTS"); String bucket = testFS.getBucket(); - AWSSecurityTokenServiceClientBuilder builder = STSClientFactory.builder( + StsClientBuilder builder = STSClientFactory.builder( conf, bucket, credentials, @@ -154,7 +152,7 @@ public void testSTS() throws IOException { // now create an invalid set of credentials by changing the session // token - conf2.set(SESSION_TOKEN, "invalid-" + sessionCreds.getSessionToken()); + conf2.set(SESSION_TOKEN, "invalid-" + sessionCreds.sessionToken()); try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf2)) { createAndVerifyFile(fs, path("testSTSInvalidToken"), TEST_FILE_SIZE); fail("Expected an access exception, but file access to " @@ -176,14 +174,13 @@ protected String getStsRegion(final Configuration conf) { } @Test - @SuppressWarnings("deprecation") public void testTemporaryCredentialValidation() throws Throwable { Configuration conf = new Configuration(); conf.set(ACCESS_KEY, "accesskey"); conf.set(SECRET_KEY, "secretkey"); conf.set(SESSION_TOKEN, ""); LambdaTestUtils.intercept(CredentialInitializationException.class, - () -> new TemporaryAWSCredentialsProvider(conf).getCredentials()); + () -> new TemporaryAWSCredentialsProvider(conf).resolveCredentials()); } /** @@ -360,7 +357,6 @@ public void testSessionCredentialsEndpointNoRegion() throws Throwable { * @return the caught exception. * @throws Exception any unexpected exception. */ - @SuppressWarnings("deprecation") public E expectedSessionRequestFailure( final Class clazz, final String endpoint, @@ -370,15 +366,15 @@ public E expectedSessionRequestFailure( getFileSystem().shareCredentials("test"); DurationInfo ignored = new DurationInfo(LOG, "requesting credentials")) { Configuration conf = new Configuration(getContract().getConf()); - ClientConfiguration awsConf = - S3AUtils.createAwsConf(conf, null, AWS_SERVICE_IDENTIFIER_STS); + return intercept(clazz, exceptionText, () -> { - AWSSecurityTokenService tokenService = + StsClient tokenService = STSClientFactory.builder(parentCreds, - awsConf, + conf, endpoint, - region) + region, + getFileSystem().getBucket()) .build(); Invoker invoker = new Invoker(new S3ARetryPolicy(conf), LOG_AT_ERROR); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 40857373fb808..5f913f437f553 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -21,9 +21,6 @@ import java.io.IOException; import java.net.URI; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.AmazonS3; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,6 +48,10 @@ import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.util.Progressable; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.S3Client; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditor; import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTrackerFactory; import static org.apache.hadoop.util.Preconditions.checkNotNull; @@ -116,9 +117,7 @@ public MockS3AFileSystem(S3AFileSystem mock, root = new Path(FS_URI.toString()); } - private static T prepareRequest(T t) { - return t; - } + private static void prepareRequest(SdkRequest.Builder t) {} @Override public RequestFactory getRequestFactory() { @@ -210,7 +209,7 @@ public boolean isMultipartUploadEnabled() { * @param client client. */ @Override - public void setAmazonS3Client(AmazonS3 client) { + public void setAmazonS3Client(S3Client client) { LOG.debug("Setting S3 client to {}", client); super.setAmazonS3Client(client); } @@ -353,13 +352,13 @@ public long getDefaultBlockSize() { void deleteObjectAtPath(Path f, String key, boolean isFile) - throws AmazonClientException, IOException { + throws SdkException, IOException { deleteObject(key); } @Override protected void maybeCreateFakeParentDirectory(Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { // no-op } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 3240309aef971..ea5544930d211 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -26,14 +26,16 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.MultipartUploadListing; import com.amazonaws.services.s3.model.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; /** * An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3} * interface suitable for unit testing. */ -@SuppressWarnings("deprecation") public class MockS3ClientFactory implements S3ClientFactory { + // TODO: This will be removed when we remove this method for the client factory. @Override public AmazonS3 createS3Client(URI uri, final S3ClientCreationParameters parameters) { @@ -45,10 +47,21 @@ public AmazonS3 createS3Client(URI uri, // return a stub value MultipartUploadListing noUploads = new MultipartUploadListing(); noUploads.setMultipartUploads(new ArrayList<>(0)); - when(s3.listMultipartUploads(any())) - .thenReturn(noUploads); - when(s3.getBucketLocation(anyString())) - .thenReturn(Region.US_West.toString()); + when(s3.listMultipartUploads(any())).thenReturn(noUploads); + when(s3.getBucketLocation(anyString())).thenReturn(Region.US_West.toString()); + return s3; + } + + //TODO: This is incomplete, add in mocks as we update operations + @Override + public S3Client createS3ClientV2(URI uri, final S3ClientCreationParameters parameters) { + S3Client s3 = mock(S3Client.class); + return s3; + } + + @Override + public S3AsyncClient createS3AsyncClient(URI uri, final S3ClientCreationParameters parameters) { + S3AsyncClient s3 = mock(S3AsyncClient.class); return s3; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java index 1ddff3c4cd5e9..8f1cca4ebf607 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java @@ -18,9 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -30,9 +27,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Date; import java.util.List; import java.util.Objects; import java.util.Set; @@ -80,10 +83,11 @@ public static IdKey createPartUpload(S3AFileSystem fs, String key, int len, byte[] data = dataset(len, 'a', 'z'); InputStream in = new ByteArrayInputStream(data); String uploadId = writeHelper.initiateMultiPartUpload(key, PutObjectOptions.keepingDirs()); - UploadPartRequest req = writeHelper.newUploadPartRequest(key, uploadId, - partNo, len, in, null, 0L); - PartETag partEtag = writeHelper.uploadPart(req, null).getPartETag(); - LOG.debug("uploaded part etag {}, upid {}", partEtag.getETag(), uploadId); + UploadPartRequest req = writeHelper.newUploadPartRequestBuilder(key, uploadId, + partNo, len).build(); + RequestBody body = RequestBody.fromInputStream(in, len); + UploadPartResponse response = writeHelper.uploadPart(req, body, null); + LOG.debug("uploaded part etag {}, upid {}", response.eTag(), uploadId); return new IdKey(key, uploadId); } } @@ -99,10 +103,10 @@ public static void clearAnyUploads(S3AFileSystem fs, Path path) { = fs.getWriteOperationHelper(); while (uploads.hasNext()) { MultipartUpload upload = uploads.next(); - LOG.debug("Cleaning up upload: {} {}", upload.getKey(), - truncatedUploadId(upload.getUploadId())); - helper.abortMultipartUpload(upload.getKey(), - upload.getUploadId(), true, LOG_EVENT); + LOG.debug("Cleaning up upload: {} {}", upload.key(), + truncatedUploadId(upload.uploadId())); + helper.abortMultipartUpload(upload.key(), + upload.uploadId(), true, LOG_EVENT); } } catch (IOException ioe) { LOG.info("Ignoring exception: ", ioe); @@ -118,8 +122,8 @@ public static void assertNoUploadsAt(S3AFileSystem fs, Path path) throws MultipartUtils.UploadIterator uploads = fs.listUploads(key); while (uploads.hasNext()) { MultipartUpload upload = uploads.next(); - Assert.fail("Found unexpected upload " + upload.getKey() + " " + - truncatedUploadId(upload.getUploadId())); + Assert.fail("Found unexpected upload " + upload.key() + " " + + truncatedUploadId(upload.uploadId())); } } @@ -149,9 +153,9 @@ public static List listMultipartUploads(S3AFileSystem fs, return fs .listMultipartUploads(prefix).stream() .map(upload -> String.format("Upload to %s with ID %s; initiated %s", - upload.getKey(), - upload.getUploadId(), - S3ATestUtils.LISTING_FORMAT.format(upload.getInitiated()))) + upload.key(), + upload.uploadId(), + S3ATestUtils.LISTING_FORMAT.format(Date.from(upload.initiated())))) .collect(Collectors.toList()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 469562f9b33b9..8fde6395d594f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -61,12 +61,12 @@ import org.apache.hadoop.util.functional.CallableRaisingIOE; import org.apache.hadoop.util.functional.FutureIO; -import com.amazonaws.auth.AWSCredentialsProvider; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Assume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import java.io.Closeable; import java.io.File; @@ -612,8 +612,7 @@ public static void unsetHadoopCredentialProviders(final Configuration conf) { * @return a set of credentials * @throws IOException on a failure */ - @SuppressWarnings("deprecation") - public static AWSCredentialsProvider buildAwsCredentialsProvider( + public static AwsCredentialsProvider buildAwsCredentialsProvider( final Configuration conf) throws IOException { assumeSessionTestsEnabled(conf); @@ -668,13 +667,14 @@ public static MarshalledCredentials requestSessionCredentials( MarshalledCredentials sc = MarshalledCredentialBinding .requestSessionCredentials( buildAwsCredentialsProvider(conf), - S3AUtils.createAwsConf(conf, bucket, AWS_SERVICE_IDENTIFIER_STS), + conf, conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT, DEFAULT_ASSUMED_ROLE_STS_ENDPOINT), conf.getTrimmed(ASSUMED_ROLE_STS_ENDPOINT_REGION, ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT), duration, - new Invoker(new S3ARetryPolicy(conf), Invoker.LOG_EVENT)); + new Invoker(new S3ARetryPolicy(conf), Invoker.LOG_EVENT), + bucket ); sc.validate("requested session credentials: ", MarshalledCredentials.CredentialTypeRequired.SessionOnly); return sc; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 36381bf14b169..e8bcd30b5f160 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -18,12 +18,13 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.regions.Regions; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; + import org.apache.hadoop.test.HadoopTestBase; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -47,9 +48,9 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String accessPoint = "testAp"; String[][] regionPartitionEndpoints = new String[][] { - {Regions.EU_WEST_1.getName(), "aws"}, - {Regions.US_GOV_EAST_1.getName(), "aws-us-gov"}, - {Regions.CN_NORTH_1.getName(), "aws-cn"}, + {Region.EU_WEST_1.id(), "aws"}, + {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, + {Region.CN_NORTH_1.id(), "aws-cn"}, }; for (String[] testPair : regionPartitionEndpoints) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java index 35199f4092790..ab20762674521 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java @@ -22,14 +22,10 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.net.SocketTimeoutException; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.AmazonS3Exception; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -38,6 +34,11 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.S3Exception; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Invoker.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.verifyExceptionClass; @@ -98,9 +99,11 @@ public class TestInvoker extends Assert { private int retryCount; private Invoker invoker = new Invoker(RETRY_POLICY, (text, e, retries, idempotent) -> retryCount++); - private static final AmazonClientException CLIENT_TIMEOUT_EXCEPTION = - new AmazonClientException(new Local.ConnectTimeoutException("timeout")); - private static final AmazonServiceException BAD_REQUEST = serviceException( + private static final SdkException CLIENT_TIMEOUT_EXCEPTION = + SdkException.builder() + .cause(new Local.ConnectTimeoutException("timeout")) + .build(); + private static final AwsServiceException BAD_REQUEST = serviceException( AWSBadRequestException.STATUS_CODE, "bad request"); @@ -109,24 +112,26 @@ public void setup() { resetCounters(); } - private static AmazonServiceException serviceException(int code, + private static AwsServiceException serviceException(int code, String text) { - AmazonServiceException ex = new AmazonServiceException(text); - ex.setStatusCode(code); - return ex; + return AwsServiceException.builder() + .message(text) + .statusCode(code) + .build(); } - private static AmazonS3Exception createS3Exception(int code) { + private static S3Exception createS3Exception(int code) { return createS3Exception(code, "", null); } - private static AmazonS3Exception createS3Exception(int code, + private static S3Exception createS3Exception(int code, String message, Throwable inner) { - AmazonS3Exception ex = new AmazonS3Exception(message); - ex.setStatusCode(code); - ex.initCause(inner); - return ex; + return (S3Exception) S3Exception.builder() + .message(message) + .statusCode(code) + .cause(inner) + .build(); } protected void verifyTranslated( @@ -136,7 +141,7 @@ protected void verifyTranslated( } private static E verifyTranslated(Class clazz, - SdkBaseException exception) throws Exception { + SdkException exception) throws Exception { return verifyExceptionClass(clazz, translateException("test", "/", exception)); } @@ -157,16 +162,22 @@ public void testS3500isStatus500Exception() throws Exception { @Test public void test500isStatus500Exception() throws Exception { - AmazonServiceException ex = new AmazonServiceException(""); - ex.setStatusCode(500); + AwsServiceException ex = AwsServiceException.builder() + .message("") + .statusCode(500) + .build(); verifyTranslated(AWSStatus500Exception.class, ex); } @Test public void testExceptionsWithTranslatableMessage() throws Exception { - SdkBaseException xmlParsing = new SdkBaseException(EOF_MESSAGE_IN_XML_PARSER); - SdkBaseException differentLength = new SdkBaseException(EOF_READ_DIFFERENT_LENGTH); + SdkException xmlParsing = SdkException.builder() + .message(EOF_MESSAGE_IN_XML_PARSER) + .build(); + SdkException differentLength = SdkException.builder() + .message(EOF_READ_DIFFERENT_LENGTH) + .build(); verifyTranslated(EOFException.class, xmlParsing); verifyTranslated(EOFException.class, differentLength); @@ -178,7 +189,9 @@ public void testSdkDifferentLengthExceptionIsTranslatable() throws Throwable { final AtomicInteger counter = new AtomicInteger(0); invoker.retry("test", null, false, () -> { if (counter.incrementAndGet() < ACTIVE_RETRY_LIMIT) { - throw new SdkClientException(EOF_READ_DIFFERENT_LENGTH); + throw SdkClientException.builder() + .message(EOF_READ_DIFFERENT_LENGTH) + .build(); } }); @@ -190,7 +203,9 @@ public void testSdkXmlParsingExceptionIsTranslatable() throws Throwable { final AtomicInteger counter = new AtomicInteger(0); invoker.retry("test", null, false, () -> { if (counter.incrementAndGet() < ACTIVE_RETRY_LIMIT) { - throw new SdkClientException(EOF_MESSAGE_IN_XML_PARSER); + throw SdkClientException.builder() + .message(EOF_MESSAGE_IN_XML_PARSER) + .build(); } }); @@ -201,14 +216,36 @@ public void testSdkXmlParsingExceptionIsTranslatable() throws Throwable { public void testExtractConnectTimeoutException() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException(LOCAL_CONNECTION_TIMEOUT_EX))); + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build())); } @Test(expected = SocketTimeoutException.class) public void testExtractSocketTimeoutException() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException(SOCKET_TIMEOUT_EX))); + SdkException.builder() + .cause(SOCKET_TIMEOUT_EX) + .build())); + } + + @Test(expected = org.apache.hadoop.net.ConnectTimeoutException.class) + public void testExtractConnectTimeoutExceptionFromCompletionException() throws Throwable { + throw extractException("", "", + new CompletionException( + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build())); + } + + @Test(expected = SocketTimeoutException.class) + public void testExtractSocketTimeoutExceptionFromCompletionException() throws Throwable { + throw extractException("", "", + new CompletionException( + SdkException.builder() + .cause(SOCKET_TIMEOUT_EX) + .build())); } /** @@ -259,7 +296,7 @@ public void testRetryThrottled() throws Throwable { ex, retries, false); } - protected AmazonServiceException newThrottledException() { + protected AwsServiceException newThrottledException() { return serviceException( AWSServiceThrottledException.STATUS_CODE, "throttled"); } @@ -354,7 +391,9 @@ public void testUnshadedConnectionTimeoutExceptionMatching() // connection timeout exceptions are special, but as AWS shades // theirs, we need to string match them verifyTranslated(ConnectTimeoutException.class, - new AmazonClientException(HTTP_CONNECTION_TIMEOUT_EX)); + SdkException.builder() + .cause(HTTP_CONNECTION_TIMEOUT_EX) + .build()); } @Test @@ -362,14 +401,18 @@ public void testShadedConnectionTimeoutExceptionMatching() throws Throwable { // connection timeout exceptions are special, but as AWS shades // theirs, we need to string match them verifyTranslated(ConnectTimeoutException.class, - new AmazonClientException(LOCAL_CONNECTION_TIMEOUT_EX)); + SdkException.builder() + .cause(LOCAL_CONNECTION_TIMEOUT_EX) + .build()); } @Test public void testShadedConnectionTimeoutExceptionNotMatching() throws Throwable { InterruptedIOException ex = verifyTranslated(InterruptedIOException.class, - new AmazonClientException(new Local.NotAConnectTimeoutException())); + SdkException.builder() + .cause(new Local.NotAConnectTimeoutException()) + .build()); if (ex instanceof ConnectTimeoutException) { throw ex; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 6456cb5e12a7f..4c56cd5c1c0ff 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -34,12 +34,14 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.InstanceProfileCredentialsProvider; import org.apache.hadoop.util.Sets; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -59,6 +61,7 @@ /** * Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. */ +// TODO: Add new tests that use a mix of V1 and V2 providers and assert that everything works ok. public class TestS3AAWSCredentialsProvider { /** @@ -73,7 +76,7 @@ public class TestS3AAWSCredentialsProvider { @Test public void testProviderWrongClass() throws Exception { expectProviderInstantiationFailure(this.getClass(), - NOT_AWS_PROVIDER); + NOT_AWS_V2_PROVIDER); } @Test @@ -103,7 +106,6 @@ public void testProviderFailureError() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testInstantiationChain() throws Throwable { Configuration conf = new Configuration(false); conf.set(AWS_CREDENTIALS_PROVIDER, @@ -123,7 +125,6 @@ public void testInstantiationChain() throws Throwable { } @Test - @SuppressWarnings("deprecation") public void testDefaultChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); Configuration conf = new Configuration(false); @@ -148,7 +149,6 @@ public void testDefaultChainNoURI() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testConfiguredChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); List> expectedClasses = @@ -167,7 +167,6 @@ public void testConfiguredChain() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testConfiguredChainUsesSharedInstanceProfile() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); Configuration conf = new Configuration(false); @@ -227,7 +226,7 @@ public void refresh() { * A credential provider whose constructor raises an NPE. */ protected static class ConstructorFailureProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { @SuppressWarnings("unused") public ConstructorFailureProvider() { @@ -235,13 +234,10 @@ public ConstructorFailureProvider() { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - } } @Test @@ -359,12 +355,12 @@ private static void assertCredentialProviders( List> expectedClasses, AWSCredentialProviderList list) { assertNotNull(list); - List providers = list.getProviders(); + List providers = list.getProviders(); assertEquals(expectedClasses.size(), providers.size()); for (int i = 0; i < expectedClasses.size(); ++i) { Class expectedClass = expectedClasses.get(i); - AWSCredentialsProvider provider = providers.get(i); + AwsCredentialsProvider provider = providers.get(i); assertNotNull( String.format("At position %d, expected class is %s, but found null.", i, expectedClass), provider); @@ -380,7 +376,6 @@ private static void assertCredentialProviders( * @see S3ATestUtils#authenticationContains(Configuration, String). */ @Test - @SuppressWarnings("deprecation") public void testAuthenticationContainsProbes() { Configuration conf = new Configuration(false); assertFalse("found AssumedRoleCredentialProvider", @@ -398,7 +393,7 @@ public void testExceptionLogic() throws Throwable { // verify you can't get credentials from it NoAuthWithAWSException noAuth = intercept(NoAuthWithAWSException.class, AWSCredentialProviderList.NO_AWS_CREDENTIAL_PROVIDERS, - () -> providers.getCredentials()); + () -> providers.resolveCredentials()); // but that it closes safely providers.close(); @@ -447,11 +442,10 @@ public void testRefCounting() throws Throwable { providers.close(); assertEquals("Ref count after close() for " + providers, 0, providers.getRefCount()); - providers.refresh(); intercept(NoAuthWithAWSException.class, AWSCredentialProviderList.CREDENTIALS_REQUESTED_WHEN_CLOSED, - () -> providers.getCredentials()); + () -> providers.resolveCredentials()); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java index beeeb39fc4852..3335d70c52f04 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java @@ -31,7 +31,6 @@ import org.junit.Before; import org.junit.Test; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -112,16 +111,14 @@ public void testWriteOperationHelperPartLimits() throws Throwable { noopAuditor(conf), AuditTestSupport.NOOP_SPAN, new MinimalWriteOperationHelperCallbacks()); - ByteArrayInputStream inputStream = new ByteArrayInputStream( - "a".getBytes()); // first one works String key = "destKey"; - woh.newUploadPartRequest(key, - "uploadId", 1, 1024, inputStream, null, 0L); + woh.newUploadPartRequestBuilder(key, + "uploadId", 1, 1024); // but ask past the limit and a PathIOE is raised intercept(PathIOException.class, key, - () -> woh.newUploadPartRequest(key, - "uploadId", 50000, 1024, inputStream, null, 0L)); + () -> woh.newUploadPartRequestBuilder(key, + "uploadId", 50000, 1024)); } static class StreamClosedException extends IOException {} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index 62a99d7209263..fab0b67b376a7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -27,16 +27,16 @@ import java.net.URI; import java.util.Date; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.junit.Test; import org.mockito.ArgumentMatcher; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + /** * deleteOnExit test for S3A. */ @@ -74,25 +74,25 @@ public void testDeleteOnExit() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); testFs.initialize(uri, conf); - AmazonS3 testS3 = testFs.getAmazonS3ClientForTesting("mocking"); + S3Client testS3 = testFs.getAmazonS3V2ClientForTesting("mocking"); Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); - ObjectMetadata meta = new ObjectMetadata(); - meta.setContentLength(1L); - meta.setLastModified(new Date(2L)); - when(testS3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) - .thenReturn(meta); + HeadObjectResponse objectMetadata = + HeadObjectResponse.builder().contentLength(1L).lastModified(new Date(2L).toInstant()) + .build(); + when(testS3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + .thenReturn(objectMetadata); testFs.deleteOnExit(path); testFs.close(); assertEquals(0, testFs.getDeleteOnDnExitCount()); } - private ArgumentMatcher correctGetMetadataRequest( + private ArgumentMatcher correctGetMetadataRequest( String bucket, String key) { return request -> request != null - && request.getBucketName().equals(bucket) - && request.getKey().equals(key); + && request.bucket().equals(bucket) + && request.key().equals(key); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index fd649c436bf59..c41752fb2b0b7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -21,7 +21,7 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.junit.Assert.*; import java.io.EOFException; @@ -33,15 +33,17 @@ import java.util.Collections; import java.util.Map; import java.util.concurrent.ExecutionException; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.s3.model.AmazonS3Exception; +import java.util.function.Consumer; import org.junit.Test; import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.S3Exception; + import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** @@ -60,13 +62,13 @@ public class TestS3AExceptionTranslation { @Test public void test301ContainsEndpoint() throws Exception { String bucket = "bucket.s3-us-west-2.amazonaws.com"; - int sc301 = 301; - AmazonS3Exception s3Exception = createS3Exception("wrong endpoint", sc301, + S3Exception s3Exception = createS3Exception("wrong endpoint", + SC_301_MOVED_PERMANENTLY, Collections.singletonMap(S3AUtils.ENDPOINT_KEY, bucket)); AWSRedirectException ex = verifyTranslated( AWSRedirectException.class, s3Exception); - assertStatusCode(sc301, ex); + assertStatusCode(SC_301_MOVED_PERMANENTLY, ex); assertNotNull(ex.getMessage()); assertContained(ex.getMessage(), bucket); @@ -88,17 +90,17 @@ protected void verifyTranslated( @Test public void test400isBad() throws Exception { - verifyTranslated(400, AWSBadRequestException.class); + verifyTranslated(SC_400_BAD_REQUEST, AWSBadRequestException.class); } @Test public void test401isNotPermittedFound() throws Exception { - verifyTranslated(401, AccessDeniedException.class); + verifyTranslated(SC_401_UNAUTHORIZED, AccessDeniedException.class); } @Test public void test403isNotPermittedFound() throws Exception { - verifyTranslated(403, AccessDeniedException.class); + verifyTranslated(SC_403_FORBIDDEN, AccessDeniedException.class); } /** @@ -106,7 +108,7 @@ public void test403isNotPermittedFound() throws Exception { */ @Test public void test404isNotFound() throws Exception { - verifyTranslated(SC_404, FileNotFoundException.class); + verifyTranslated(SC_404_NOT_FOUND, FileNotFoundException.class); } /** @@ -114,8 +116,11 @@ public void test404isNotFound() throws Exception { */ @Test public void testUnknownBucketException() throws Exception { - AmazonS3Exception ex404 = createS3Exception(SC_404); - ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); + S3Exception ex404 = createS3Exception(b -> b + .statusCode(SC_404_NOT_FOUND) + .awsErrorDetails(AwsErrorDetails.builder() + .errorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET) + .build())); verifyTranslated( UnknownStoreException.class, ex404); @@ -123,12 +128,12 @@ public void testUnknownBucketException() throws Exception { @Test public void test410isNotFound() throws Exception { - verifyTranslated(410, FileNotFoundException.class); + verifyTranslated(SC_410_GONE, FileNotFoundException.class); } @Test public void test416isEOF() throws Exception { - verifyTranslated(416, EOFException.class); + verifyTranslated(SC_416_RANGE_NOT_SATISFIABLE, EOFException.class); } @Test @@ -143,19 +148,21 @@ public void testGenericS3Exception() throws Exception { @Test public void testGenericServiceS3Exception() throws Exception { // service exception of no known type - AmazonServiceException ase = new AmazonServiceException("unwind"); - ase.setStatusCode(500); + AwsServiceException ase = AwsServiceException.builder() + .message("unwind") + .statusCode(SC_500_INTERNAL_SERVER_ERROR) + .build(); AWSServiceIOException ex = verifyTranslated( AWSStatus500Exception.class, ase); - assertStatusCode(500, ex); + assertStatusCode(SC_500_INTERNAL_SERVER_ERROR, ex); } protected void assertStatusCode(int expected, AWSServiceIOException ex) { assertNotNull("Null exception", ex); - if (expected != ex.getStatusCode()) { + if (expected != ex.statusCode()) { throw new AssertionError("Expected status code " + expected - + "but got " + ex.getStatusCode(), + + "but got " + ex.statusCode(), ex); } } @@ -164,23 +171,38 @@ protected void assertStatusCode(int expected, AWSServiceIOException ex) { public void testGenericClientException() throws Exception { // Generic Amazon exception verifyTranslated(AWSClientIOException.class, - new AmazonClientException("")); + SdkException.builder().message("").build()); + } + + private static S3Exception createS3Exception( + Consumer consumer) { + S3Exception.Builder builder = S3Exception.builder() + .awsErrorDetails(AwsErrorDetails.builder() + .build()); + consumer.accept(builder); + return (S3Exception) builder.build(); } - private static AmazonS3Exception createS3Exception(int code) { - return createS3Exception("", code, null); + private static S3Exception createS3Exception(int code) { + return createS3Exception(b -> b.message("").statusCode(code)); } - private static AmazonS3Exception createS3Exception(String message, int code, + private static S3Exception createS3Exception(String message, int code, Map additionalDetails) { - AmazonS3Exception source = new AmazonS3Exception(message); - source.setStatusCode(code); - source.setAdditionalDetails(additionalDetails); + S3Exception source = (S3Exception) S3Exception.builder() + .message(message) + .statusCode(code) + .build(); + // TODO: is there an equivalent for v2? + // currently used to retrieve endpoint on redirect + // see S3AUtils.translateException and + // https://github.com/aws/aws-sdk-java-v2/issues/3048 + // source.setAdditionalDetails(additionalDetails); return source; } private static E verifyTranslated(Class clazz, - AmazonClientException exception) throws Exception { + SdkException exception) throws Exception { // Verifying that the translated exception have the correct error message. IOException ioe = translateException("test", "/", exception); assertExceptionContains(exception.getMessage(), ioe, @@ -212,16 +234,18 @@ public void testInterruptExceptionDetecting() throws Throwable { public void testExtractInterrupted() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException( - new InterruptedException("")))); + SdkException.builder() + .cause(new InterruptedException("")) + .build())); } @Test(expected = InterruptedIOException.class) public void testExtractInterruptedIO() throws Throwable { throw extractException("", "", new ExecutionException( - new AmazonClientException( - new InterruptedIOException("")))); + SdkException.builder() + .cause(new InterruptedIOException("")) + .build())); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 34a275b580f25..541f3b0486191 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -21,22 +21,14 @@ import static org.junit.Assert.*; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.FileNotFoundException; +import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3ObjectSummary; - import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -44,6 +36,15 @@ import org.junit.Test; import org.mockito.ArgumentMatcher; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; + /** * S3A tests for getFileStatus using mock S3 client. */ @@ -53,17 +54,17 @@ public class TestS3AGetFileStatus extends AbstractS3AMockTest { public void testFile() throws Exception { Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); - ObjectMetadata meta = new ObjectMetadata(); - meta.setContentLength(1L); - meta.setLastModified(new Date(2L)); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) - .thenReturn(meta); + HeadObjectResponse objectMetadata = + HeadObjectResponse.builder().contentLength(1L).lastModified(new Date(2L).toInstant()) + .build(); + when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + .thenReturn(objectMetadata); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); assertTrue(stat.isFile()); - assertEquals(meta.getContentLength(), stat.getLen()); - assertEquals(meta.getLastModified().getTime(), stat.getModificationTime()); + assertEquals(objectMetadata.contentLength().longValue(), stat.getLen()); + assertEquals(Date.from(objectMetadata.lastModified()).getTime(), stat.getModificationTime()); ContractTestUtils.assertNotErasureCoded(fs, path); assertTrue(path + " should have erasure coding unset in " + "FileStatus#toString(): " + stat, @@ -74,17 +75,16 @@ public void testFile() throws Exception { public void testFakeDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); String keyDir = key + "/"; - ListObjectsV2Result listResult = new ListObjectsV2Result(); - S3ObjectSummary objectSummary = new S3ObjectSummary(); - objectSummary.setKey(keyDir); - objectSummary.setSize(0L); - listResult.getObjectSummaries().add(objectSummary); - when(s3.listObjectsV2(argThat( + List s3Objects = new ArrayList<>(1); + s3Objects.add(S3Object.builder().key(keyDir).size(0L).build()); + ListObjectsV2Response listObjectsV2Response = + ListObjectsV2Response.builder().contents(s3Objects).build(); + when(s3V2.listObjectsV2(argThat( matchListV2Request(BUCKET, keyDir)) - )).thenReturn(listResult); + )).thenReturn(listObjectsV2Response); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -95,12 +95,13 @@ public void testFakeDirectory() throws Exception { public void testImplicitDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3V2.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/")) )).thenThrow(NOT_FOUND); - setupListMocks(Collections.singletonList("dir/"), Collections.emptyList()); + setupListMocks(Collections.singletonList(CommonPrefix.builder().prefix("dir/").build()), + Collections.emptyList()); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -115,9 +116,9 @@ public void testImplicitDirectory() throws Exception { public void testRoot() throws Exception { Path path = new Path("/"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3V2.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -132,9 +133,9 @@ public void testRoot() throws Exception { public void testNotFound() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3.getObjectMetadata(argThat( + when(s3V2.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -142,36 +143,38 @@ public void testNotFound() throws Exception { fs.getFileStatus(path); } - private void setupListMocks(List prefixes, - List summaries) { - + private void setupListMocks(List prefixes, + List s3Objects) { // V1 list API mock - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn(prefixes); - when(objects.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + ListObjectsResponse v1Response = ListObjectsResponse.builder() + .commonPrefixes(prefixes) + .contents(s3Objects) + .build(); + when(s3V2.listObjects(any(ListObjectsRequest.class))).thenReturn(v1Response); // V2 list API mock - ListObjectsV2Result v2Result = mock(ListObjectsV2Result.class); - when(v2Result.getCommonPrefixes()).thenReturn(prefixes); - when(v2Result.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjectsV2(any(ListObjectsV2Request.class))) - .thenReturn(v2Result); + ListObjectsV2Response v2Result = ListObjectsV2Response.builder() + .commonPrefixes(prefixes) + .contents(s3Objects) + .build(); + when(s3V2.listObjectsV2( + any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))).thenReturn( + v2Result); } - private ArgumentMatcher correctGetMetadataRequest( + private ArgumentMatcher correctGetMetadataRequest( String bucket, String key) { return request -> request != null - && request.getBucketName().equals(bucket) - && request.getKey().equals(key); + && request.bucket().equals(bucket) + && request.key().equals(key); } private ArgumentMatcher matchListV2Request( String bucket, String key) { return (ListObjectsV2Request request) -> { return request != null - && request.getBucketName().equals(bucket) - && request.getPrefix().equals(key); + && request.bucket().equals(bucket) + && request.prefix().equals(key); }; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java index c62bf5daca3a4..58f045828c77c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java @@ -19,16 +19,12 @@ package org.apache.hadoop.fs.s3a; import javax.net.ssl.SSLException; +import java.io.FilterInputStream; import java.io.IOException; import java.net.SocketException; import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; import org.junit.Test; import org.apache.commons.io.IOUtils; @@ -37,6 +33,13 @@ import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import static java.lang.Math.min; import static org.apache.hadoop.util.functional.FutureIO.eval; import static org.junit.Assert.assertArrayEquals; @@ -121,13 +124,22 @@ private S3AInputStream getMockedS3AInputStream() { * @return mocked object. */ private S3AInputStream.InputStreamCallbacks getMockedInputStreamCallback() { - return new S3AInputStream.InputStreamCallbacks() { + GetObjectResponse objectResponse = GetObjectResponse.builder() + .eTag("test-etag") + .build(); + + ResponseInputStream[] responseInputStreams = + new ResponseInputStream[] { + getMockedInputStream(objectResponse, true), + getMockedInputStream(objectResponse, true), + getMockedInputStream(objectResponse, false) + }; - private final S3Object mockedS3Object = getMockedS3Object(); + return new S3AInputStream.InputStreamCallbacks() { private Integer mockedS3ObjectIndex = 0; @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject(GetObjectRequest request) { // Set s3 client to return mocked s3object with defined read behavior. mockedS3ObjectIndex++; // open() -> lazySeek() -> reopen() @@ -144,14 +156,17 @@ public S3Object getObject(GetObjectRequest request) { // -> getObjectContent(objectInputStreamGood)-> objectInputStreamGood // -> wrappedStream.read if (mockedS3ObjectIndex == 3) { - throw new SdkClientException("Failed to get S3Object"); + throw AwsServiceException.builder() + .message("Failed to get S3Object") + .awsErrorDetails(AwsErrorDetails.builder().errorCode("test-code").build()) + .build(); } - return mockedS3Object; + return responseInputStreams[min(mockedS3ObjectIndex, responseInputStreams.length) - 1]; } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(fs.getBucket(), key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(fs.getBucket()).key(key); } @Override @@ -166,70 +181,41 @@ public void close() { } /** - * Get mocked S3Object that returns bad input stream on the initial of - * getObjectContent calls. - * - * @return mocked object. - */ - private S3Object getMockedS3Object() { - S3ObjectInputStream objectInputStreamBad1 = getMockedInputStream(true); - S3ObjectInputStream objectInputStreamBad2 = getMockedInputStream(true); - S3ObjectInputStream objectInputStreamGood = getMockedInputStream(false); - - return new S3Object() { - private final S3ObjectInputStream[] inputStreams = - {objectInputStreamBad1, objectInputStreamBad2, objectInputStreamGood}; - - private Integer inputStreamIndex = 0; - - @Override - public S3ObjectInputStream getObjectContent() { - // Set getObjectContent behavior: - // Returns bad stream twice, and good stream afterwards. - inputStreamIndex++; - return inputStreams[min(inputStreamIndex, inputStreams.length) - 1]; - } - - @Override - public ObjectMetadata getObjectMetadata() { - // Set getObjectMetadata behavior: returns dummy metadata - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setHeader("ETag", "test-etag"); - return metadata; - } - }; - } - - /** - * Get mocked S3ObjectInputStream where we can trigger IOException to + * Get mocked ResponseInputStream where we can trigger IOException to * simulate the read failure. * * @param triggerFailure true when a failure injection is enabled. * @return mocked object. */ - private S3ObjectInputStream getMockedInputStream(boolean triggerFailure) { - return new S3ObjectInputStream(IOUtils.toInputStream(INPUT, StandardCharsets.UTF_8), null) { - - private final IOException exception = - new SSLException(new SocketException("Connection reset")); - - @Override - public int read() throws IOException { - int result = super.read(); - if (triggerFailure) { - throw exception; - } - return result; - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - int result = super.read(b, off, len); - if (triggerFailure) { - throw exception; - } - return result; - } - }; + private ResponseInputStream getMockedInputStream( + GetObjectResponse objectResponse, boolean triggerFailure) { + + FilterInputStream inputStream = + new FilterInputStream(IOUtils.toInputStream(INPUT, StandardCharsets.UTF_8)) { + + private final IOException exception = + new SSLException(new SocketException("Connection reset")); + + @Override + public int read() throws IOException { + int result = super.read(); + if (triggerFailure) { + throw exception; + } + return result; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int result = super.read(b, off, len); + if (triggerFailure) { + throw exception; + } + return result; + } + }; + + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(inputStream, () -> {})); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java index 0e105c25c3a45..43f55a2a7c48d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java @@ -18,17 +18,21 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; - import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.junit.Test; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + import java.io.IOException; -import java.util.Date; +import java.io.InputStream; +import java.time.Instant; import static org.junit.Assert.assertEquals; @@ -40,10 +44,10 @@ import static org.mockito.Mockito.when; /** - * Uses mocks to check that the {@link S3ObjectInputStream} is closed when - * {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer} is called. Unlike the - * other unbuffer tests, this specifically tests that the underlying S3 object - * stream is closed. + * Uses mocks to check that the {@link ResponseInputStream} is + * closed when {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer} is called. + * Unlike the other unbuffer tests, this specifically tests that the underlying + * S3 object stream is closed. */ public class TestS3AUnbuffer extends AbstractS3AMockTest { @@ -51,22 +55,27 @@ public class TestS3AUnbuffer extends AbstractS3AMockTest { public void testUnbuffer() throws IOException { // Create mock ObjectMetadata for getFileStatus() Path path = new Path("/file"); - ObjectMetadata meta = mock(ObjectMetadata.class); - when(meta.getContentLength()).thenReturn(1L); - when(meta.getLastModified()).thenReturn(new Date(2L)); - when(meta.getETag()).thenReturn("mock-etag"); - when(s3.getObjectMetadata(any())).thenReturn(meta); + HeadObjectResponse objectMetadata = HeadObjectResponse.builder() + .contentLength(1L) + .lastModified(Instant.ofEpochMilli(2L)) + .eTag("mock-etag") + .build(); + when(s3V2.headObject((HeadObjectRequest) any())).thenReturn(objectMetadata); - // Create mock S3ObjectInputStream and S3Object for open() - S3ObjectInputStream objectStream = mock(S3ObjectInputStream.class); + // Create mock ResponseInputStream and GetObjectResponse for open() + GetObjectResponse objectResponse = GetObjectResponse.builder() + .contentLength(1L) + .lastModified(Instant.ofEpochMilli(2L)) + .eTag("mock-etag") + .build(); + InputStream objectStream = mock(InputStream.class); when(objectStream.read()).thenReturn(-1); when(objectStream.read(any(byte[].class))).thenReturn(-1); when(objectStream.read(any(byte[].class), anyInt(), anyInt())).thenReturn(-1); - - S3Object s3Object = mock(S3Object.class); - when(s3Object.getObjectContent()).thenReturn(objectStream); - when(s3Object.getObjectMetadata()).thenReturn(meta); - when(s3.getObject(any())).thenReturn(s3Object); + ResponseInputStream getObjectResponseInputStream = + new ResponseInputStream(objectResponse, + AbortableInputStream.create(objectStream, () -> {})); + when(s3V2.getObject((GetObjectRequest) any())).thenReturn(getObjectResponseInputStream); // Call read and then unbuffer FSDataInputStream stream = fs.open(path); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index 42de7cdffc80e..55b45a0399dd1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -18,18 +18,18 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.SdkBaseException; -import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.transfer.model.CopyResult; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.CopyObjectResult; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; @@ -40,6 +40,7 @@ import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.createPolicy; import static org.apache.hadoop.fs.s3a.impl.ChangeTracker.CHANGE_REPORTED_BY_S3; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_412_PRECONDITION_FAILED; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -68,7 +69,7 @@ public void testVersionCheckingHandlingNoVersions() throws Throwable { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied contraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, null), "", 0); @@ -96,7 +97,7 @@ public void testEtagCheckingWarn() throws Throwable { ChangeDetectionPolicy.Source.ETag, false); assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse("e1", null), "", 0); @@ -122,13 +123,13 @@ public void testVersionCheckingOnClient() throws Throwable { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, "rev1"), "", 0); assertTrackerMismatchCount(tracker, 0); assertRevisionId(tracker, "rev1"); - GetObjectRequest request = newGetObjectRequest(); + GetObjectRequest request = newGetObjectRequestBuilder().build(); expectChangeException(tracker, newResponse(null, "rev2"), "change detected"); // mismatch was noted (so gets to FS stats) @@ -149,14 +150,14 @@ public void testVersionCheckingOnServer() throws Throwable { ChangeDetectionPolicy.Source.VersionId, false); assertFalse("Tracker should not have applied contraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); tracker.processResponse( newResponse(null, "rev1"), "", 0); assertTrackerMismatchCount(tracker, 0); assertRevisionId(tracker, "rev1"); - GetObjectRequest request = newGetObjectRequest(); - assertConstraintApplied(tracker, request); + GetObjectRequest.Builder builder = newGetObjectRequestBuilder(); + assertConstraintApplied(tracker, builder); // now, the tracker expects a null response expectChangeException(tracker, null, CHANGE_REPORTED_BY_S3); assertTrackerMismatchCount(tracker, 1); @@ -249,31 +250,33 @@ public void testCopyVersionMismatch() throws Throwable { // 412 is translated to RemoteFileChangedException // note: this scenario is never currently hit due to // https://github.com/aws/aws-sdk-java/issues/1644 - AmazonServiceException awsException = - new AmazonServiceException("aws exception"); - awsException.setStatusCode(ChangeTracker.SC_PRECONDITION_FAILED); + AwsServiceException awsException = + AwsServiceException.builder() + .message("aws exception") + .statusCode(SC_412_PRECONDITION_FAILED) + .build(); expectChangeException(tracker, awsException, "copy", RemoteFileChangedException.PRECONDITIONS_FAILED); // processing another type of exception does nothing - tracker.processException(new SdkBaseException("foo"), "copy"); + tracker.processException(SdkException.builder().message("foo").build(), "copy"); } protected void assertConstraintApplied(final ChangeTracker tracker, - final GetObjectRequest request) { + final GetObjectRequest.Builder builder) { assertTrue("Tracker should have applied contraints " + tracker, - tracker.maybeApplyConstraint(request)); + tracker.maybeApplyConstraint(builder)); } protected void assertConstraintApplied(final ChangeTracker tracker, - final CopyObjectRequest request) throws PathIOException { + final CopyObjectRequest.Builder requestBuilder) throws PathIOException { assertTrue("Tracker should have applied contraints " + tracker, - tracker.maybeApplyConstraint(request)); + tracker.maybeApplyConstraint(requestBuilder)); } protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, RemoteFileChangedException.class); @@ -281,7 +284,7 @@ protected RemoteFileChangedException expectChangeException( protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, - final SdkBaseException exception, + final SdkException exception, final String operation, final String message) throws Exception { return expectException(tracker, exception, operation, message, @@ -290,7 +293,7 @@ protected RemoteFileChangedException expectChangeException( protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, NoVersionAttributeException.class); @@ -298,7 +301,7 @@ protected PathIOException expectNoVersionAttributeException( protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, - final CopyResult response, + final CopyObjectResponse response, final String message) throws Exception { return expectException(tracker, response, message, NoVersionAttributeException.class); @@ -306,7 +309,7 @@ protected PathIOException expectNoVersionAttributeException( protected T expectException( final ChangeTracker tracker, - final S3Object response, + final GetObjectResponse response, final String message, final Class clazz) throws Exception { return intercept( @@ -320,7 +323,7 @@ protected T expectException( protected T expectException( final ChangeTracker tracker, - final CopyResult response, + final CopyObjectResponse response, final String message, final Class clazz) throws Exception { return intercept( @@ -334,7 +337,7 @@ protected T expectException( protected T expectException( final ChangeTracker tracker, - final SdkBaseException exception, + final SdkException exception, final String operation, final String message, final Class clazz) throws Exception { @@ -389,48 +392,36 @@ protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode, if (objectAttributes.getVersionId() == null && objectAttributes.getETag() == null) { assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + tracker.maybeApplyConstraint(newGetObjectRequestBuilder())); } return tracker; } - private GetObjectRequest newGetObjectRequest() { - return new GetObjectRequest(BUCKET, OBJECT); + private GetObjectRequest.Builder newGetObjectRequestBuilder() { + return GetObjectRequest.builder().bucket(BUCKET).key(OBJECT); } - private CopyObjectRequest newCopyObjectRequest() { - return new CopyObjectRequest(BUCKET, OBJECT, BUCKET, DEST_OBJECT); + private CopyObjectRequest.Builder newCopyObjectRequest() { + return CopyObjectRequest.builder().sourceBucket(BUCKET).sourceKey(OBJECT) + .destinationBucket(BUCKET).destinationKey(DEST_OBJECT); } - private CopyResult newCopyResult(String eTag, String versionId) { - CopyResult copyResult = new CopyResult(); - copyResult.setSourceBucketName(BUCKET); - copyResult.setSourceKey(OBJECT); - copyResult.setDestinationBucketName(BUCKET); - copyResult.setDestinationKey(DEST_OBJECT); - copyResult.setETag(eTag); - copyResult.setVersionId(versionId); - return copyResult; + private CopyObjectResponse newCopyResult(String eTag, String versionId) { + CopyObjectResponse.Builder copyObjectResponseBuilder = CopyObjectResponse.builder(); + + return copyObjectResponseBuilder.versionId(versionId) + .copyObjectResult(CopyObjectResult.builder().eTag(eTag).build()).build(); } - private S3Object newResponse(String etag, String versionId) { - ObjectMetadata md = new ObjectMetadata(); + private GetObjectResponse newResponse(String etag, String versionId) { + GetObjectResponse.Builder builder = GetObjectResponse.builder(); if (etag != null) { - md.setHeader(Headers.ETAG, etag); + builder.eTag(etag); } if (versionId != null) { - md.setHeader(Headers.S3_VERSION_ID, versionId); + builder.versionId(versionId); } - S3Object response = emptyResponse(); - response.setObjectMetadata(md); - return response; - } - - private S3Object emptyResponse() { - S3Object response = new S3Object(); - response.setBucketName(BUCKET); - response.setKey(OBJECT); - return response; + return builder.build(); } private S3ObjectAttributes objectAttributes( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index 298c1444bb9b3..a629f1c478a79 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -21,13 +21,11 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.net.URI; import java.util.Map; import java.util.function.Consumer; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; import org.junit.After; import org.junit.Before; import org.slf4j.Logger; @@ -42,6 +40,13 @@ import org.apache.hadoop.fs.store.audit.AuditSpan; import org.apache.hadoop.test.AbstractHadoopTestBase; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; + import static org.apache.hadoop.fs.s3a.Statistic.INVOCATION_GET_FILE_STATUS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.UNAUDITED_OPERATION; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.createIOStatisticsStoreForAuditing; @@ -137,11 +142,24 @@ protected AuditSpanS3A activeSpan() { /** * Create a head request and pass it through the manager's beforeExecution() * callback. + * * @return a processed request. */ - protected GetObjectMetadataRequest head() { - return manager.beforeExecution( - requestFactory.newGetObjectMetadataRequest("/")); + protected SdkHttpRequest head() { + HeadObjectRequest.Builder headObjectRequestBuilder = + requestFactory.newHeadObjectRequestBuilder("/"); + manager.requestCreated(headObjectRequestBuilder); + HeadObjectRequest headObjectRequest = headObjectRequestBuilder.build(); + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build(); + InterceptorContext context = InterceptorContext.builder() + .request(headObjectRequest) + .httpRequest(SdkHttpRequest.builder() + .uri(URI.create("https://test")) + .method(SdkHttpMethod.HEAD) + .build()) + .build(); + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java index ad72d75081b27..1520e588e544e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AuditTestSupport.java @@ -30,7 +30,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_SPAN_CREATION; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.LOGGING_AUDIT_SERVICE; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.NOOP_AUDIT_SERVICE; @@ -119,7 +119,7 @@ public static Configuration resetAuditOptions(Configuration conf) { S3ATestUtils.removeBaseAndBucketOverrides(conf, REFERRER_HEADER_ENABLED, REJECT_OUT_OF_SPAN_OPERATIONS, - AUDIT_REQUEST_HANDLERS, + AUDIT_EXECUTION_INTERCEPTORS, AUDIT_SERVICE_CLASSNAME, AUDIT_ENABLED); return conf; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java index 9e6d82ce6ac75..bd60165ebe42e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java @@ -33,7 +33,7 @@ import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.enableLoggingAuditor; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.resetAuditOptions; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.UNAUDITED_OPERATION; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic; @@ -57,8 +57,8 @@ public Configuration createConfiguration() { Configuration conf = super.createConfiguration(); resetAuditOptions(conf); enableLoggingAuditor(conf); - conf.set(AUDIT_REQUEST_HANDLERS, - SimpleAWSRequestHandler.CLASS); + conf.set(AUDIT_EXECUTION_INTERCEPTORS, + SimpleAWSExecutionInterceptor.CLASS); return conf; } @@ -117,14 +117,14 @@ public void testInvokeOutOfSpanRejected() throws Throwable { public void testRequestHandlerBinding() throws Throwable { describe("Verify that extra request handlers can be added and that they" + " will be invoked during request execution"); - final long baseCount = SimpleAWSRequestHandler.getInvocationCount(); + final long baseCount = SimpleAWSExecutionInterceptor.getInvocationCount(); final S3AFileSystem fs = getFileSystem(); final long exec0 = lookupCounterStatistic(iostats(), AUDIT_REQUEST_EXECUTION.getSymbol()); // API call to a known path, `getBucketLocation()` does not always result in an API call. fs.listStatus(path("/")); // which MUST have ended up calling the extension request handler - Assertions.assertThat(SimpleAWSRequestHandler.getInvocationCount()) + Assertions.assertThat(SimpleAWSExecutionInterceptor.getInvocationCount()) .describedAs("Invocation count of plugged in request handler") .isGreaterThan(baseCount); assertThatStatisticCounter(iostats(), AUDIT_REQUEST_EXECUTION.getSymbol()) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java similarity index 68% rename from hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java index 6f5a0445a92f7..8014b05187387 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSRequestHandler.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java @@ -20,28 +20,28 @@ import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.handlers.RequestHandler2; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; /** - * Simple AWS handler to verify dynamic loading of extra request - * handlers during auditing setup. + * Simple AWS interceptor to verify dynamic loading of extra + * execution interceptors during auditing setup. * The invocation counter tracks the count of calls to - * {@link #beforeExecution(AmazonWebServiceRequest)}. + * {@link #beforeExecution}. */ -public final class SimpleAWSRequestHandler extends RequestHandler2 { +public final class SimpleAWSExecutionInterceptor implements ExecutionInterceptor { public static final String CLASS - = "org.apache.hadoop.fs.s3a.audit.SimpleAWSRequestHandler"; + = "org.apache.hadoop.fs.s3a.audit.SimpleAWSExecutionInterceptor"; /** Count of invocations. */ private static final AtomicLong INVOCATIONS = new AtomicLong(0); @Override - public AmazonWebServiceRequest beforeExecution( - final AmazonWebServiceRequest request) { + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { INVOCATIONS.incrementAndGet(); - return request; } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java index 7cdab4c4b75e0..252500c0c1056 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java @@ -22,9 +22,6 @@ import java.nio.file.AccessDeniedException; import java.util.List; -import com.amazonaws.DefaultRequest; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -39,13 +36,21 @@ import org.apache.hadoop.service.Service; import org.apache.hadoop.test.AbstractHadoopTestBase; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; + import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.retrieveAttachedSpan; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.createIOStatisticsStoreForAuditing; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditConfig; -import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME; +import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_EXECUTION_ATTRIBUTE; import static org.apache.hadoop.service.ServiceAssert.assertServiceStateStarted; import static org.apache.hadoop.service.ServiceAssert.assertServiceStateStopped; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -159,30 +164,51 @@ public void testAuditManagerLifecycle() throws Throwable { } @Test - public void testSingleRequestHandler() throws Throwable { + public void testSingleExecutionInterceptor() throws Throwable { AuditManagerS3A manager = AuditIntegration.createAndStartAuditManager( noopAuditConfig(), ioStatistics); - List handlers - = manager.createRequestHandlers(); - assertThat(handlers) + List interceptors + = manager.createExecutionInterceptors(); + assertThat(interceptors) .hasSize(1); - RequestHandler2 handler = handlers.get(0); + ExecutionInterceptor interceptor = interceptors.get(0); + RequestFactory requestFactory = RequestFactoryImpl.builder() .withBucket("bucket") .build(); + HeadObjectRequest.Builder requestBuilder = + requestFactory.newHeadObjectRequestBuilder("/"); + + assertThat(interceptor instanceof AWSAuditEventCallbacks).isTrue(); + ((AWSAuditEventCallbacks)interceptor).requestCreated(requestBuilder); + + HeadObjectRequest request = requestBuilder.build(); + SdkHttpRequest httpRequest = SdkHttpRequest.builder() + .protocol("https") + .host("test") + .method(SdkHttpMethod.HEAD) + .build(); + + ExecutionAttributes attributes = ExecutionAttributes.builder().build(); + InterceptorContext context = InterceptorContext.builder() + .request(request) + .httpRequest(httpRequest) + .build(); + // test the basic pre-request sequence while avoiding // the complexity of recreating the full sequence // (and probably getting it wrong) - GetObjectMetadataRequest r - = requestFactory.newGetObjectMetadataRequest("/"); - DefaultRequest dr = new DefaultRequest(r, "S3"); - assertThat(handler.beforeMarshalling(r)) - .isNotNull(); - assertThat(handler.beforeExecution(r)) - .isNotNull(); - handler.beforeRequest(dr); - + // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html + interceptor.beforeExecution(context, attributes); + interceptor.modifyRequest(context, attributes); + interceptor.beforeMarshalling(context, attributes); + interceptor.afterMarshalling(context, attributes); + interceptor.modifyHttpRequest(context, attributes); + interceptor.beforeTransmission(context, attributes); + AuditSpanS3A span = attributes.getAttribute(AUDIT_SPAN_EXECUTION_ATTRIBUTE); + assertThat(span).isNotNull(); + assertThat(span.isValidSpan()).isFalse(); } /** @@ -192,14 +218,14 @@ public void testSingleRequestHandler() throws Throwable { public void testRequestHandlerLoading() throws Throwable { Configuration conf = noopAuditConfig(); conf.setClassLoader(this.getClass().getClassLoader()); - conf.set(AUDIT_REQUEST_HANDLERS, - SimpleAWSRequestHandler.CLASS); + conf.set(AUDIT_EXECUTION_INTERCEPTORS, + SimpleAWSExecutionInterceptor.CLASS); AuditManagerS3A manager = AuditIntegration.createAndStartAuditManager( conf, ioStatistics); - assertThat(manager.createRequestHandlers()) + assertThat(manager.createExecutionInterceptors()) .hasSize(2) - .hasAtLeastOneElementOfType(SimpleAWSRequestHandler.class); + .hasAtLeastOneElementOfType(SimpleAWSExecutionInterceptor.class); } @Test @@ -216,8 +242,8 @@ public void testLoggingAuditorBinding() throws Throwable { @Test public void testNoopAuditManager() throws Throwable { AuditManagerS3A manager = AuditIntegration.stubAuditManager(); - assertThat(manager.createStateChangeListener()) - .describedAs("transfer state change listener") + assertThat(manager.createTransferListener()) + .describedAs("transfer listener") .isNotNull(); } @@ -226,11 +252,10 @@ public void testSpanAttachAndRetrieve() throws Throwable { AuditManagerS3A manager = AuditIntegration.stubAuditManager(); AuditSpanS3A span = manager.createSpan("op", null, null); - GetObjectMetadataRequest request = - new GetObjectMetadataRequest("bucket", "key"); - attachSpanToRequest(request, span); - AWSAuditEventCallbacks callbacks = retrieveAttachedSpan(request); - assertThat(callbacks).isSameAs(span); + ExecutionAttributes attributes = ExecutionAttributes.builder().build(); + attachSpanToRequest(attributes, span); + AuditSpanS3A retrievedSpan = retrieveAttachedSpan(attributes); + assertThat(retrievedSpan).isSameAs(span); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java index 608667d9dfed8..af0d397e58ff2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java @@ -20,13 +20,14 @@ import java.util.List; -import com.amazonaws.handlers.RequestHandler2; import org.junit.Before; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.store.audit.AuditSpan; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditConfig; import static org.assertj.core.api.Assertions.assertThat; @@ -56,10 +57,10 @@ public void testStop() throws Throwable { } @Test - public void testCreateRequestHandlers() throws Throwable { - List handlers - = getManager().createRequestHandlers(); - assertThat(handlers).isNotEmpty(); + public void testCreateExecutionInterceptors() throws Throwable { + List interceptors + = getManager().createExecutionInterceptors(); + assertThat(interceptors).isNotEmpty(); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index b772e6dfc06fc..59f35b6194965 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -20,12 +20,10 @@ import java.io.IOException; import java.net.URISyntaxException; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -38,6 +36,8 @@ import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; import org.apache.hadoop.security.UserGroupInformation; +import software.amazon.awssdk.http.SdkHttpRequest; + import static org.apache.hadoop.fs.audit.AuditConstants.DELETE_KEYS_SIZE; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.REFERRER_HEADER_FILTER; @@ -97,13 +97,16 @@ protected Configuration createConfig() { public void testHttpReferrerPatchesTheRequest() throws Throwable { AuditSpan span = span(); long ts = span.getTimestamp(); - GetObjectMetadataRequest request = head(); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = head(); + Map> headers = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java index 8d37b432acb56..0af43a1c1db6c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java @@ -18,9 +18,6 @@ package org.apache.hadoop.fs.s3a.audit; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.transfer.internal.TransferStateChangeListener; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -30,6 +27,13 @@ import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor; import org.apache.hadoop.fs.store.audit.AuditSpan; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.assertj.core.api.Assertions.assertThat; @@ -131,8 +135,23 @@ public void testLoggingSpan() throws Throwable { */ @Test public void testCopyOutsideSpanAllowed() throws Throwable { - getManager().beforeExecution(new CopyPartRequest()); - getManager().beforeExecution(new CompleteMultipartUploadRequest()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(UploadPartCopyRequest.builder().build()) + .build(), + ExecutionAttributes.builder().build()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(GetBucketLocationRequest.builder().build()) + .build(), + ExecutionAttributes.builder().build()); + getManager().beforeExecution( + InterceptorContext.builder() + .request(CompleteMultipartUploadRequest.builder() + .multipartUpload(u -> {}) + .build()) + .build(), + ExecutionAttributes.builder().build()); } /** @@ -141,9 +160,9 @@ public void testCopyOutsideSpanAllowed() throws Throwable { */ @Test public void testTransferStateListenerOutsideSpan() throws Throwable { - TransferStateChangeListener listener - = getManager().createStateChangeListener(); - listener.transferStateChanged(null, null); + TransferListener listener + = getManager().createTransferListener(); + listener.transferInitiated(null); assertHeadUnaudited(); } @@ -158,15 +177,15 @@ public void testTransferStateListenerInSpan() throws Throwable { AuditSpan span = span(); // create the listener in the span - TransferStateChangeListener listener - = getManager().createStateChangeListener(); + TransferListener listener + = getManager().createTransferListener(); span.deactivate(); // head calls fail assertHeadUnaudited(); // until the state change switches this thread back to the span - listener.transferStateChanged(null, null); + listener.transferInitiated(null); // which can be probed assertActiveSpan(span); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 9fb09b4cede52..5c638f6a9ce19 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -26,14 +26,15 @@ import java.util.List; import java.util.stream.IntStream; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException; import com.fasterxml.jackson.core.JsonProcessingException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.services.sts.model.StsException; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -140,7 +141,6 @@ private E expectFileSystemCreateFailure( } @Test - @SuppressWarnings("deprecation") public void testCreateCredentialProvider() throws IOException { describe("Create the credential provider"); @@ -148,13 +148,12 @@ public void testCreateCredentialProvider() throws IOException { try (AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(uri, conf)) { LOG.info("Provider is {}", provider); - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); assertNotNull("Null credentials from " + provider, credentials); } } @Test - @SuppressWarnings("deprecation") public void testCreateCredentialProviderNoURI() throws IOException { describe("Create the credential provider"); @@ -162,7 +161,7 @@ public void testCreateCredentialProviderNoURI() throws IOException { try (AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(null, conf)) { LOG.info("Provider is {}", provider); - AWSCredentials credentials = provider.getCredentials(); + AwsCredentials credentials = provider.resolveCredentials(); assertNotNull("Null credentials from " + provider, credentials); } } @@ -172,7 +171,6 @@ public void testCreateCredentialProviderNoURI() throws IOException { * @return a configuration set to use to the role ARN. * @throws JsonProcessingException problems working with JSON policies. */ - @SuppressWarnings("deprecation") protected Configuration createValidRoleConf() throws JsonProcessingException { String roleARN = getAssumedRoleARN(); @@ -186,11 +184,10 @@ protected Configuration createValidRoleConf() throws JsonProcessingException { } @Test - @SuppressWarnings("deprecation") public void testAssumedInvalidRole() throws Throwable { Configuration conf = new Configuration(); conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE); - interceptClosing(AWSSecurityTokenServiceException.class, + interceptClosing(StsException.class, "", () -> new AssumedRoleCredentialProvider(uri, conf)); } @@ -204,7 +201,6 @@ public void testAssumeRoleFSBadARN() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleNoARN() throws Exception { describe("Attemnpt to create the FS with no ARN"); Configuration conf = createAssumedRoleConfig(); @@ -237,7 +233,6 @@ public void testAssumeRoleFSBadPolicy2() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleCannotAuthAssumedRole() throws Exception { describe("Assert that you can't use assumed roles to auth assumed roles"); @@ -251,7 +246,6 @@ public void testAssumeRoleCannotAuthAssumedRole() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleBadInnerAuth() throws Exception { describe("Try to authenticate with a keypair with spaces"); @@ -267,7 +261,6 @@ public void testAssumeRoleBadInnerAuth() throws Exception { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleBadInnerAuth2() throws Exception { describe("Try to authenticate with an invalid keypair"); @@ -351,7 +344,6 @@ private Configuration createAssumedRoleConfig(String roleARN) { } @Test - @SuppressWarnings("deprecation") public void testAssumeRoleUndefined() throws Throwable { describe("Verify that you cannot instantiate the" + " AssumedRoleCredentialProvider without a role ARN"); @@ -363,12 +355,11 @@ public void testAssumeRoleUndefined() throws Throwable { } @Test - @SuppressWarnings("deprecation") public void testAssumedIllegalDuration() throws Throwable { describe("Expect the constructor to fail if the session is to short"); Configuration conf = new Configuration(); conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s"); - interceptClosing(AWSSecurityTokenServiceException.class, "", + interceptClosing(StsException.class, "", () -> new AssumedRoleCredentialProvider(uri, conf)); } @@ -537,7 +528,6 @@ public Path methodPath() throws IOException { * don't break. */ @Test - @SuppressWarnings("deprecation") public void testAssumedRoleRetryHandler() throws Throwable { try(AssumedRoleCredentialProvider provider = new AssumedRoleCredentialProvider(getFileSystem().getUri(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java index 37c2dce4e1d72..186887d745bfc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java @@ -146,7 +146,6 @@ public static void assertTouchForbidden(final FileSystem fs, final Path path) * @param roleARN ARN of role * @return the new configuration */ - @SuppressWarnings("deprecation") public static Configuration newAssumedRoleConfig( final Configuration srcConf, final String roleARN) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java index c5ed9dbaac429..9ad7b26cb6512 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java @@ -21,9 +21,9 @@ import java.net.URI; import java.net.URISyntaxException; -import com.amazonaws.auth.AWSCredentials; import org.junit.Before; import org.junit.Test; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; @@ -94,13 +94,13 @@ public void testMarshalledCredentialProviderSession() throws Throwable { new Configuration(false), credentials, MarshalledCredentials.CredentialTypeRequired.SessionOnly); - AWSCredentials aws = provider.getCredentials(); + AwsCredentials aws = provider.resolveCredentials(); assertEquals(credentials.toString(), credentials.getAccessKey(), - aws.getAWSAccessKeyId()); + aws.accessKeyId()); assertEquals(credentials.toString(), credentials.getSecretKey(), - aws.getAWSSecretKey()); + aws.secretAccessKey()); // because the credentials are set to full only, creation will fail } @@ -119,7 +119,7 @@ public void testCredentialTypeMismatch() throws Throwable { MarshalledCredentials.CredentialTypeRequired.FullOnly); // because the credentials are set to full only, creation will fail intercept(NoAuthWithAWSException.class, "test", - () -> provider.getCredentials()); + () -> provider.resolveCredentials()); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java index 3a7d78d68f7d5..4ee79e7220afc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java @@ -20,8 +20,8 @@ import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.fs.s3a.CredentialInitializationException; @@ -29,23 +29,18 @@ * Simple AWS credential provider which counts how often it is invoked. */ public class CountInvocationsProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { public static final String NAME = CountInvocationsProvider.class.getName(); public static final AtomicLong COUNTER = new AtomicLong(0); @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { COUNTER.incrementAndGet(); throw new CredentialInitializationException("no credentials"); } - @Override - public void refresh() { - - } - public static long getInvocationCount() { return COUNTER.get(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java index 295125169a00c..54a3a337ede25 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java @@ -26,14 +26,15 @@ import java.net.URI; import java.nio.file.AccessDeniedException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -175,7 +176,8 @@ protected Configuration createConfiguration() { conf.set(YarnConfiguration.RM_PRINCIPAL, YARN_RM); // turn on ACLs so as to verify role DT permissions include // write access. - conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); + // TODO: Why do we need this? Can we get rid of ACLs? + // conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); return conf; } @@ -254,7 +256,6 @@ public void testGetDTfromFileSystem() throws Throwable { } @Test - @SuppressWarnings("deprecation") public void testAddTokensFromFileSystem() throws Throwable { describe("verify FileSystem.addDelegationTokens() collects tokens"); S3AFileSystem fs = getFileSystem(); @@ -276,7 +277,7 @@ public void testAddTokensFromFileSystem() throws Throwable { AWSCredentialProviderList providerList = requireNonNull( delegationTokens.getCredentialProviders(), "providers"); - providerList.getCredentials(); + providerList.resolveCredentials(); } @Test @@ -323,14 +324,16 @@ protected Credentials createDelegationTokens() throws IOException { * Create a FS with a delegated token, verify it works as a filesystem, * and that you can pick up the same DT from that FS too. */ - @SuppressWarnings("deprecation") @Test public void testDelegatedFileSystem() throws Throwable { describe("Delegation tokens can be passed to a new filesystem;" + " if role restricted, permissions are tightened."); S3AFileSystem fs = getFileSystem(); // force a probe of the remote FS to make sure its endpoint is valid - fs.getObjectMetadata(new Path("/")); + // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would + // return some metadata. (bucket region, content type). headObject() fails without a key, check + // how this can be fixed. + // fs.getObjectMetadata(new Path("/")); readLandsatMetadata(fs); URI uri = fs.getUri(); @@ -577,8 +580,7 @@ public void testDelegationBindingMismatch2() throws Throwable { * @return result of the HEAD * @throws Exception failure */ - @SuppressWarnings("deprecation") - protected ObjectMetadata readLandsatMetadata(final S3AFileSystem delegatedFS) + protected HeadBucketResponse readLandsatMetadata(final S3AFileSystem delegatedFS) throws Exception { AWSCredentialProviderList testingCreds = delegatedFS.shareCredentials("testing"); @@ -596,10 +598,10 @@ protected ObjectMetadata readLandsatMetadata(final S3AFileSystem delegatedFS) .withMetrics(new EmptyS3AStatisticsContext() .newStatisticsFromAwsSdk()) .withUserAgentSuffix("ITestSessionDelegationInFilesystem"); - AmazonS3 s3 = factory.createS3Client(landsat, parameters); + S3Client s3 = factory.createS3ClientV2(landsat, parameters); return Invoker.once("HEAD", host, - () -> s3.getObjectMetadata(host, landsat.getPath().substring(1))); + () -> s3.headBucket(b -> b.bucket(host))); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java index fab7ffdbb76f8..629538a379638 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java @@ -22,12 +22,12 @@ import java.io.IOException; import java.net.URI; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSSessionCredentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; @@ -188,9 +188,9 @@ public void testCreateAndUseDT() throws Throwable { dt2.start(); dt2.resetTokenBindingToDT(originalDT); - final AWSSessionCredentials awsSessionCreds + final AwsSessionCredentials awsSessionCreds = verifySessionCredentials( - dt2.getCredentialProviders().getCredentials()); + dt2.getCredentialProviders().resolveCredentials()); final MarshalledCredentials origCreds = fromAWSCredentials( awsSessionCreds); @@ -249,7 +249,7 @@ public void testCreateWithRenewer() throws Throwable { * @return the retrieved DT. This is only for error reporting. * @throws IOException failure. */ - @SuppressWarnings({"OptionalGetWithoutIsPresent", "deprecation"}) + @SuppressWarnings({"OptionalGetWithoutIsPresent"}) protected AbstractS3ATokenIdentifier verifyCredentialPropagation( final S3AFileSystem fs, final MarshalledCredentials session, @@ -278,7 +278,7 @@ protected AbstractS3ATokenIdentifier verifyCredentialPropagation( LOG.info("Regenerated DT is {}", newDT); final MarshalledCredentials creds2 = fromAWSCredentials( verifySessionCredentials( - delegationTokens2.getCredentialProviders().getCredentials())); + delegationTokens2.getCredentialProviders().resolveCredentials())); assertEquals("Credentials", session, creds2); assertTrue("Origin in " + boundId, boundId.getOrigin() @@ -287,12 +287,12 @@ protected AbstractS3ATokenIdentifier verifyCredentialPropagation( } } - private AWSSessionCredentials verifySessionCredentials( - final AWSCredentials creds) { - AWSSessionCredentials session = (AWSSessionCredentials) creds; - assertNotNull("access key", session.getAWSAccessKeyId()); - assertNotNull("secret key", session.getAWSSecretKey()); - assertNotNull("session token", session.getSessionToken()); + private AwsSessionCredentials verifySessionCredentials( + final AwsCredentials creds) { + AwsSessionCredentials session = (AwsSessionCredentials) creds; + assertNotNull("access key", session.accessKeyId()); + assertNotNull("secret key", session.secretAccessKey()); + assertNotNull("session token", session.sessionToken()); return session; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java index 88d9ebfcdfdc3..992643ff8ce98 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java @@ -37,7 +37,6 @@ import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.SESSION_TOKEN_KIND; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java index b193cca03db00..94251431ad0a4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java @@ -718,7 +718,7 @@ private void validateContent(Path dir, private void validateStorageClass(Path dir, String expectedStorageClass) throws Exception { Path expectedFile = getPart0000(dir); S3AFileSystem fs = getFileSystem(); - String actualStorageClass = fs.getObjectMetadata(expectedFile).getStorageClass(); + String actualStorageClass = fs.getObjectMetadata(expectedFile).storageClassAsString(); Assertions.assertThat(actualStorageClass) .describedAs("Storage class of object %s", expectedFile) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index 6f2953762439a..811eacfc98f65 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -29,21 +29,6 @@ import java.util.UUID; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.MultipartUpload; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; import org.apache.hadoop.util.Lists; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.junit.AfterClass; @@ -81,6 +66,21 @@ import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.test.HadoopTestBase; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; @@ -339,7 +339,7 @@ public abstract static class JobCommitterTest // created in Before private StagingTestBase.ClientResults results = null; private StagingTestBase.ClientErrors errors = null; - private AmazonS3 mockClient = null; + private S3Client mockClient = null; @Before public void setupJob() throws Exception { @@ -448,7 +448,7 @@ protected File getTempDir() { public static class ClientResults implements Serializable { private static final long serialVersionUID = -3137637327090709905L; // For inspection of what the committer did - private final Map requests = + private final Map requests = Maps.newHashMap(); private final List uploads = Lists.newArrayList(); private final List parts = Lists.newArrayList(); @@ -461,7 +461,7 @@ public static class ClientResults implements Serializable { Maps.newHashMap(); private final List deletes = Lists.newArrayList(); - public Map getRequests() { + public Map getRequests() { return requests; } @@ -490,7 +490,7 @@ public List getDeletes() { } public List getDeletePaths() { - return deletes.stream().map(DeleteObjectRequest::getKey).collect( + return deletes.stream().map(DeleteObjectRequest::key).collect( Collectors.toList()); } @@ -619,197 +619,163 @@ private static T getArgumentAt(InvocationOnMock invocation, int index, * @param errors when (if any) to fail * @return the mock client to patch in to a committer/FS instance */ - public static AmazonS3 newMockS3Client(final ClientResults results, + public static S3Client newMockS3Client(final ClientResults results, final ClientErrors errors) { - AmazonS3Client mockClient = mock(AmazonS3Client.class); + S3Client mockClientV2 = mock(S3Client.class); final Object lock = new Object(); // initiateMultipartUpload - when(mockClient - .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) + when(mockClientV2 + .createMultipartUpload(any(CreateMultipartUploadRequest.class))) .thenAnswer(invocation -> { - LOG.debug("initiateMultipartUpload for {}", mockClient); + LOG.debug("initiateMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.requests.size() == errors.failOnInit) { if (errors.recover) { errors.failOnInit(-1); } - throw new AmazonClientException( - "Mock Fail on init " + results.requests.size()); + throw AwsServiceException.builder() + .message("Mock Fail on init " + results.requests.size()) + .build(); } String uploadId = UUID.randomUUID().toString(); - InitiateMultipartUploadRequest req = getArgumentAt(invocation, - 0, InitiateMultipartUploadRequest.class); + CreateMultipartUploadRequest req = getArgumentAt(invocation, + 0, CreateMultipartUploadRequest.class); results.requests.put(uploadId, req); - results.activeUploads.put(uploadId, req.getKey()); + results.activeUploads.put(uploadId, req.key()); results.uploads.add(uploadId); - return newResult(results.requests.get(uploadId), uploadId); + return CreateMultipartUploadResponse.builder() + .uploadId(uploadId) + .build(); } }); // uploadPart - when(mockClient.uploadPart(any(UploadPartRequest.class))) + when(mockClientV2.uploadPart(any(UploadPartRequest.class), any(RequestBody.class))) .thenAnswer(invocation -> { - LOG.debug("uploadPart for {}", mockClient); + LOG.debug("uploadPart for {}", mockClientV2); synchronized (lock) { if (results.parts.size() == errors.failOnUpload) { if (errors.recover) { errors.failOnUpload(-1); } LOG.info("Triggering upload failure"); - throw new AmazonClientException( - "Mock Fail on upload " + results.parts.size()); + throw AwsServiceException.builder() + .message("Mock Fail on upload " + results.parts.size()) + .build(); } UploadPartRequest req = getArgumentAt(invocation, 0, UploadPartRequest.class); results.parts.add(req); String etag = UUID.randomUUID().toString(); - List etags = results.tagsByUpload.get(req.getUploadId()); + List etags = results.tagsByUpload.get(req.uploadId()); if (etags == null) { etags = Lists.newArrayList(); - results.tagsByUpload.put(req.getUploadId(), etags); + results.tagsByUpload.put(req.uploadId(), etags); } etags.add(etag); - return newResult(req, etag); + return UploadPartResponse.builder().eTag(etag).build(); } }); // completeMultipartUpload - when(mockClient + when(mockClientV2 .completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) .thenAnswer(invocation -> { - LOG.debug("completeMultipartUpload for {}", mockClient); + LOG.debug("completeMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.commits.size() == errors.failOnCommit) { if (errors.recover) { errors.failOnCommit(-1); } - throw new AmazonClientException( - "Mock Fail on commit " + results.commits.size()); + throw AwsServiceException.builder() + .message("Mock Fail on commit " + results.commits.size()) + .build(); } CompleteMultipartUploadRequest req = getArgumentAt(invocation, 0, CompleteMultipartUploadRequest.class); - String uploadId = req.getUploadId(); + String uploadId = req.uploadId(); removeUpload(results, uploadId); results.commits.add(req); - - return newResult(req); + return CompleteMultipartUploadResponse.builder().build(); } }); // abortMultipartUpload mocking doAnswer(invocation -> { - LOG.debug("abortMultipartUpload for {}", mockClient); + LOG.debug("abortMultipartUpload for {}", mockClientV2); synchronized (lock) { if (results.aborts.size() == errors.failOnAbort) { if (errors.recover) { errors.failOnAbort(-1); } - throw new AmazonClientException( - "Mock Fail on abort " + results.aborts.size()); + throw AwsServiceException.builder() + .message("Mock Fail on abort " + results.aborts.size()) + .build(); } AbortMultipartUploadRequest req = getArgumentAt(invocation, 0, AbortMultipartUploadRequest.class); - String id = req.getUploadId(); + String id = req.uploadId(); removeUpload(results, id); results.aborts.add(req); return null; } }) - .when(mockClient) + .when(mockClientV2) .abortMultipartUpload(any(AbortMultipartUploadRequest.class)); // deleteObject mocking doAnswer(invocation -> { - LOG.debug("deleteObject for {}", mockClient); + LOG.debug("deleteObject for {}", mockClientV2); synchronized (lock) { results.deletes.add(getArgumentAt(invocation, 0, DeleteObjectRequest.class)); return null; } }) - .when(mockClient) + .when(mockClientV2) .deleteObject(any(DeleteObjectRequest.class)); - // deleteObject mocking - doAnswer(invocation -> { - LOG.debug("deleteObject for {}", mockClient); - synchronized (lock) { - results.deletes.add(new DeleteObjectRequest( - getArgumentAt(invocation, 0, String.class), - getArgumentAt(invocation, 1, String.class) - )); - return null; - } - }).when(mockClient) - .deleteObject(any(String.class), any(String.class)); - // to String returns the debug information - when(mockClient.toString()).thenAnswer( + when(mockClientV2.toString()).thenAnswer( invocation -> "Mock3AClient " + results + " " + errors); - when(mockClient + when(mockClientV2 .listMultipartUploads(any(ListMultipartUploadsRequest.class))) .thenAnswer(invocation -> { synchronized (lock) { - MultipartUploadListing l = new MultipartUploadListing(); - l.setMultipartUploads( - results.activeUploads.entrySet().stream() - .map(e -> newMPU(e.getKey(), e.getValue())) - .collect(Collectors.toList())); - return l; + return ListMultipartUploadsResponse.builder() + .uploads(results.activeUploads.entrySet().stream() + .map(e -> MultipartUpload.builder() + .uploadId(e.getKey()) + .key(e.getValue()) + .build()) + .collect(Collectors.toList())) + .build(); } }); - return mockClient; + return mockClientV2; } /** * Remove an upload from the upload map. * @param results result set * @param uploadId The upload ID to remove - * @throws AmazonS3Exception with error code 404 if the id is unknown. + * @throws AwsServiceException with error code 404 if the id is unknown. */ protected static void removeUpload(final ClientResults results, final String uploadId) { String removed = results.activeUploads.remove(uploadId); if (removed == null) { // upload doesn't exist - AmazonS3Exception ex = new AmazonS3Exception( - "not found " + uploadId); - ex.setStatusCode(404); - throw ex; + throw AwsServiceException.builder() + .message("not found " + uploadId) + .statusCode(404) + .build(); } } - private static CompleteMultipartUploadResult newResult( - CompleteMultipartUploadRequest req) { - return new CompleteMultipartUploadResult(); - } - - - private static MultipartUpload newMPU(String id, String path) { - MultipartUpload up = new MultipartUpload(); - up.setUploadId(id); - up.setKey(path); - return up; - } - - private static UploadPartResult newResult(UploadPartRequest request, - String etag) { - UploadPartResult result = new UploadPartResult(); - result.setPartNumber(request.getPartNumber()); - result.setETag(etag); - return result; - } - - private static InitiateMultipartUploadResult newResult( - InitiateMultipartUploadRequest request, String uploadId) { - InitiateMultipartUploadResult result = new InitiateMultipartUploadResult(); - result.setUploadId(uploadId); - return result; - } - /** * create files in the attempt path that should be found by * {@code getTaskOutput}. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java index 4d24c07dacfe2..a91f70c4077be 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java @@ -27,8 +27,6 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -import com.amazonaws.services.s3.model.PartETag; - import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.assertj.core.api.Assertions; import org.junit.AfterClass; @@ -39,6 +37,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.CompletedPart; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -159,8 +159,8 @@ private void createTasks() throws IOException { // step1: a list of tags. // this is the md5sum of hadoop 3.2.1.tar String tag = "9062dcf18ffaee254821303bbd11c72b"; - List etags = IntStream.rangeClosed(1, BLOCKS_PER_TASK + 1) - .mapToObj(i -> new PartETag(i, tag)) + List etags = IntStream.rangeClosed(1, BLOCKS_PER_TASK + 1) + .mapToObj(i -> CompletedPart.builder().partNumber(i).eTag(tag).build()) .collect(Collectors.toList()); SinglePendingCommit base = new SinglePendingCommit(); base.setBucket(BUCKET); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java index 11edf0d216376..5df2a6563db15 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java @@ -31,9 +31,6 @@ import java.util.UUID; import java.util.stream.Collectors; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; import org.apache.hadoop.util.Sets; import org.assertj.core.api.Assertions; @@ -51,7 +48,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.AWSClientIOException; import org.apache.hadoop.fs.s3a.MockS3AFileSystem; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.commit.AbstractS3ACommitter; @@ -70,6 +66,10 @@ import org.apache.hadoop.mapreduce.task.JobContextImpl; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.*; @@ -112,7 +112,7 @@ public class TestStagingCommitter extends StagingTestBase.MiniDFSTest { // created in Before private StagingTestBase.ClientResults results = null; private StagingTestBase.ClientErrors errors = null; - private AmazonS3 mockClient = null; + private S3Client mockClient = null; private File tmpDir; /** @@ -473,7 +473,7 @@ public void testTaskInitializeFailure() throws Exception { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept(AWSClientIOException.class, + intercept(IOException.class, "Fail on init 1", "Should fail during init", () -> committer.commitTask(tac)); @@ -501,7 +501,7 @@ public void testTaskSingleFileUploadFailure() throws Exception { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 2", "Should fail during upload", () -> { @@ -513,7 +513,7 @@ public void testTaskSingleFileUploadFailure() throws Exception { 1, results.getUploads().size()); assertEquals("Should abort the upload", results.getUploads().get(0), - results.getAborts().get(0).getUploadId()); + results.getAborts().get(0).uploadId()); assertPathDoesNotExist(fs, "Should remove the attempt path", attemptPath); } @@ -532,7 +532,7 @@ public void testTaskMultiFileUploadFailure() throws Exception { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 5", "Should fail during upload", () -> { @@ -564,7 +564,7 @@ public void testTaskUploadAndAbortFailure() throws Exception { writeOutputFile(tac.getTaskAttemptID(), attemptPath, UUID.randomUUID().toString(), 10); - intercept((Class) AWSClientIOException.class, + intercept(IOException.class, "Fail on upload 5", "Should suppress abort failure, propagate upload failure", ()-> { @@ -637,7 +637,7 @@ public void testJobCommitFailure() throws Exception { errors.failOnCommit(5); setMockLogLevel(MockS3AFileSystem.LOG_NAME); - intercept(AWSClientIOException.class, + intercept(IOException.class, "Fail on commit 5", "Should propagate the commit failure", () -> { @@ -645,17 +645,16 @@ public void testJobCommitFailure() throws Exception { return jobCommitter.toString(); }); - Set commits = results.getCommits() .stream() .map(commit -> - "s3a://" + commit.getBucketName() + "/" + commit.getKey()) + "s3a://" + commit.bucket() + "/" + commit.key()) .collect(Collectors.toSet()); Set deletes = results.getDeletes() .stream() .map(delete -> - "s3a://" + delete.getBucketName() + "/" + delete.getKey()) + "s3a://" + delete.bucket() + "/" + delete.key()) .collect(Collectors.toSet()); Assertions.assertThat(commits) @@ -729,14 +728,14 @@ private Set runTasks(JobContext jobContext, private static Set getAbortedIds( List aborts) { return aborts.stream() - .map(AbortMultipartUploadRequest::getUploadId) + .map(AbortMultipartUploadRequest::uploadId) .collect(Collectors.toSet()); } private static Set getCommittedIds( List commits) { return commits.stream() - .map(CompleteMultipartUploadRequest::getUploadId) + .map(CompleteMultipartUploadRequest::uploadId) .collect(Collectors.toSet()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java index 4e82b94314d34..2f86ae75e8666 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java @@ -24,7 +24,6 @@ import java.util.Set; import java.util.UUID; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Sets; import org.assertj.core.api.Assertions; @@ -36,6 +35,8 @@ import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.mapreduce.JobContext; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; + import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.Mockito.*; @@ -146,10 +147,10 @@ public void testAppend() throws Exception { protected void verifyFilesCreated( final PartitionedStagingCommitter committer) { Set files = Sets.newHashSet(); - for (InitiateMultipartUploadRequest request : + for (CreateMultipartUploadRequest request : getMockResults().getRequests().values()) { - assertEquals(BUCKET, request.getBucketName()); - files.add(request.getKey()); + assertEquals(BUCKET, request.bucket()); + files.add(request.key()); } Assertions.assertThat(files) .describedAs("Should have the right number of uploads") diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java index 378f4a70433d7..3a91e4026a872 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java @@ -28,7 +28,6 @@ import java.util.TreeSet; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.junit.runner.RunWith; @@ -40,6 +39,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.DurationInfo; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java index 2610f54b44e9e..ce439d89d7f4e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java @@ -23,13 +23,13 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonClientException; - import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.exception.SdkException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; @@ -207,7 +207,7 @@ private BlockingFakeDirMarkerFS() { @Override protected void maybeCreateFakeParentDirectory(final Path path) - throws IOException, AmazonClientException { + throws IOException, SdkException { LOG.info("waking anything blocked on the signal semaphore"); // notify anything waiting signalCreatingFakeParentDirectory.release(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java index 3a390e34ecad2..f69870afe50c9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java @@ -64,6 +64,9 @@ public ITestXAttrCost() { @Test public void testXAttrRoot() throws Throwable { describe("Test xattr on root"); + // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would + // return some metadata. (bucket region, content type). headObject() fails without a key, check + // how this can be fixed. Path root = new Path("/"); S3AFileSystem fs = getFileSystem(); Map xAttrs = verifyMetrics( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java index 82592b1d01950..7883fa4c83f39 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java @@ -26,11 +26,12 @@ import java.util.List; import java.util.Map; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.assertj.core.api.Assertions; import org.assertj.core.util.Lists; import org.junit.Before; import org.junit.Test; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.MockS3AFileSystem; @@ -206,20 +207,20 @@ public void testMetadataCopySkipsMagicAttribute() throws Throwable { final String owner = "x-header-owner"; final String root = "root"; CONTEXT_ACCESSORS.userHeaders.put(owner, root); - final ObjectMetadata source = CONTEXT_ACCESSORS + final HeadObjectResponse source = CONTEXT_ACCESSORS .getObjectMetadata(MAGIC_KEY); - final Map sourceUserMD = source.getUserMetadata(); + final Map sourceUserMD = source.metadata(); Assertions.assertThat(sourceUserMD.get(owner)) .describedAs("owner header in copied MD") .isEqualTo(root); - ObjectMetadata dest = new ObjectMetadata(); - headerProcessing.cloneObjectMetadata(source, dest); + Map destUserMetadata = new HashMap<>(); + headerProcessing.cloneObjectMetadata(source, destUserMetadata, CopyObjectRequest.builder()); - Assertions.assertThat(dest.getUserMetadata().get(X_HEADER_MAGIC_MARKER)) + Assertions.assertThat(destUserMetadata.get(X_HEADER_MAGIC_MARKER)) .describedAs("Magic marker header in copied MD") .isNull(); - Assertions.assertThat(dest.getUserMetadata().get(owner)) + Assertions.assertThat(destUserMetadata.get(owner)) .describedAs("owner header in copied MD") .isEqualTo(root); } @@ -307,14 +308,13 @@ public RequestFactory getRequestFactory() { } @Override - public ObjectMetadata getObjectMetadata(final String key) + public HeadObjectResponse getObjectMetadata(final String key) throws IOException { if (MAGIC_KEY.equals(key)) { - ObjectMetadata omd = new ObjectMetadata(); - omd.setUserMetadata(userHeaders); - omd.setContentLength(len); - omd.setLastModified(date); - return omd; + return HeadObjectResponse.builder() + .metadata(userHeaders) + .contentLength(len) + .lastModified(date.toInstant()).build(); } else { throw new FileNotFoundException(key); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index 7c85142d4376d..92481388e876d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -18,21 +18,21 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.AwsRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectCannedACL; + +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.audit.AWSRequestAnalyzer; @@ -57,7 +57,7 @@ public class TestRequestFactory extends AbstractHadoopTestBase { private final AWSRequestAnalyzer analyzer = new AWSRequestAnalyzer(); /** - * Count of requests analyzed via the {@link #a(AmazonWebServiceRequest)} + * Count of requests analyzed via the {@link #a(AwsRequest.Builder)} * call. */ private int requestsAnalyzed; @@ -81,27 +81,28 @@ public void testRequestFactoryWithEncryption() throws Throwable { */ @Test public void testRequestFactoryWithCannedACL() throws Throwable { - CannedAccessControlList acl = CannedAccessControlList.BucketOwnerFullControl; + ObjectCannedACL acl = ObjectCannedACL.BUCKET_OWNER_FULL_CONTROL; RequestFactory factory = RequestFactoryImpl.builder() .withBucket("bucket") .withCannedACL(acl) .build(); String path = "path"; String path2 = "path2"; - ObjectMetadata md = factory.newObjectMetadata(128); - Assertions.assertThat( - factory.newPutObjectRequest(path, md, - null, new ByteArrayInputStream(new byte[0])) - .getCannedAcl()) + HeadObjectResponse md = HeadObjectResponse.builder().contentLength(128L).build(); + + Assertions.assertThat(factory.newPutObjectRequestBuilder(path, null, 128, false) + .build() + .acl()) .describedAs("ACL of PUT") .isEqualTo(acl); - Assertions.assertThat(factory.newCopyObjectRequest(path, path2, md) - .getCannedAccessControlList()) + Assertions.assertThat(factory.newCopyObjectRequestBuilder(path, path2, md) + .build() + .acl()) .describedAs("ACL of COPY") .isEqualTo(acl); - Assertions.assertThat(factory.newMultipartUploadRequest(path, - null) - .getCannedACL()) + Assertions.assertThat(factory.newMultipartUploadRequestBuilder(path, null) + .build() + .acl()) .describedAs("ACL of MPU") .isEqualTo(acl); } @@ -132,21 +133,18 @@ private final class CountRequests private final AtomicLong counter = new AtomicLong(); @Override - public T prepareRequest(final T t) { + public void prepareRequest(final SdkRequest.Builder t) { counter.addAndGet(1); - return t; } } /** * Analyze the request, log the output, return the info. - * @param request request. - * @param type of request. + * @param builder request builder. * @return value */ - private AWSRequestAnalyzer.RequestInfo - a(T request) { - AWSRequestAnalyzer.RequestInfo info = analyzer.analyze(request); + private AWSRequestAnalyzer.RequestInfo a(AwsRequest.Builder builder) { + AWSRequestAnalyzer.RequestInfo info = analyzer.analyze(builder.build()); LOG.info("{}", info); requestsAnalyzed++; return info; @@ -160,27 +158,25 @@ private void createFactoryObjects(RequestFactory factory) throws IOException { String path = "path"; String path2 = "path2"; String id = "1"; - ObjectMetadata md = factory.newObjectMetadata(128); - a(factory.newAbortMultipartUploadRequest(path, id)); - a(factory.newCompleteMultipartUploadRequest(path, id, + a(factory.newAbortMultipartUploadRequestBuilder(path, id)); + a(factory.newCompleteMultipartUploadRequestBuilder(path, id, new ArrayList<>())); - a(factory.newCopyObjectRequest(path, path2, md)); - a(factory.newDeleteObjectRequest(path)); - a(factory.newBulkDeleteRequest(new ArrayList<>())); + a(factory.newCopyObjectRequestBuilder(path, path2, + HeadObjectResponse.builder().build())); + a(factory.newDeleteObjectRequestBuilder(path)); + a(factory.newBulkDeleteRequestBuilder(new ArrayList<>())); a(factory.newDirectoryMarkerRequest(path)); - a(factory.newGetObjectRequest(path)); - a(factory.newGetObjectMetadataRequest(path)); - a(factory.newListMultipartUploadsRequest(path)); - a(factory.newListObjectsV1Request(path, "/", 1)); - a(factory.newListNextBatchOfObjectsRequest(new ObjectListing())); - a(factory.newListObjectsV2Request(path, "/", 1)); - a(factory.newMultipartUploadRequest(path, null)); - File srcfile = new File("/tmp/a"); - a(factory.newPutObjectRequest(path, - factory.newObjectMetadata(-1), null, srcfile)); - ByteArrayInputStream stream = new ByteArrayInputStream(new byte[0]); - a(factory.newPutObjectRequest(path, md, null, stream)); - a(factory.newSelectRequest(path)); + a(factory.newGetObjectRequestBuilder(path)); + a(factory.newHeadObjectRequestBuilder(path)); + a(factory.newListMultipartUploadsRequestBuilder(path)); + a(factory.newListObjectsV1RequestBuilder(path, "/", 1)); + a(factory.newListObjectsV2RequestBuilder(path, "/", 1)); + a(factory.newMultipartUploadRequestBuilder(path, null)); + a(factory.newPutObjectRequestBuilder(path, + PutObjectOptions.keepingDirs(), -1, true)); + a(factory.newPutObjectRequestBuilder(path, + PutObjectOptions.deletingDirs(), 1024, false)); + a(factory.newSelectRequestBuilder(path)); } /** @@ -193,25 +189,18 @@ public void testMultipartUploadRequest() throws Throwable { RequestFactory factory = RequestFactoryImpl.builder() .withBucket("bucket") .withRequestPreparer(countRequests) + .withMultipartPartCountLimit(2) .build(); String path = "path"; - String path2 = "path2"; String id = "1"; - File srcfile = File.createTempFile("file", ""); - try { - ByteArrayInputStream stream = new ByteArrayInputStream(new byte[0]); - - a(factory.newUploadPartRequest(path, id, 1, 0, stream, null, 0)); - a(factory.newUploadPartRequest(path, id, 2, 128_000_000, - null, srcfile, 0)); - // offset is past the EOF - intercept(IllegalArgumentException.class, () -> - factory.newUploadPartRequest(path, id, 3, 128_000_000, - null, srcfile, 128)); - } finally { - srcfile.delete(); - } + + a(factory.newUploadPartRequestBuilder(path, id, 1, 0)); + a(factory.newUploadPartRequestBuilder(path, id, 2, 128_000_000)); + // partNumber is past the limit + intercept(PathIOException.class, () -> + factory.newUploadPartRequestBuilder(path, id, 3, 128_000_000)); + assertThat(countRequests.counter.get()) .describedAs("request preparation count") .isEqualTo(requestsAnalyzed); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java index 33a44a9ad78f7..85970a65887f4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java @@ -19,13 +19,15 @@ package org.apache.hadoop.fs.s3a.impl; import java.io.IOException; +import java.io.InputStream; -import com.amazonaws.internal.SdkFilterInputStream; import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.hadoop.test.HadoopTestBase; +import software.amazon.awssdk.http.Abortable; + import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_INPUT_STREAM_STATISTICS; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -127,7 +129,6 @@ public void testStreamUnderflow() throws Throwable { public void testReadFailure() throws Throwable { int threshold = 50; SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, new FakeSDKInputStream(BYTES, threshold), false, BYTES, @@ -145,7 +146,6 @@ public void testReadFailure() throws Throwable { public void testReadFailureDoesNotSurfaceInAbort() throws Throwable { int threshold = 50; SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, new FakeSDKInputStream(BYTES, threshold), true, BYTES, @@ -183,7 +183,6 @@ private SDKStreamDrainer drainer(int remaining, boolean shouldAbort, FakeSDKInputStream in) throws Throwable { SDKStreamDrainer drainer = new SDKStreamDrainer("s3://example/", - null, in, shouldAbort, remaining, @@ -246,7 +245,8 @@ private static SDKStreamDrainer assertBytesRead(final SDKStreamDrainer drainer, * Fake stream; generates data dynamically. * Only overrides the methods used in stream draining. */ - private static final class FakeSDKInputStream extends SdkFilterInputStream { + private static final class FakeSDKInputStream extends InputStream + implements Abortable { private final int capacity; @@ -264,7 +264,6 @@ private static final class FakeSDKInputStream extends SdkFilterInputStream { * @param readToRaiseIOE position to raise an IOE, or -1 */ private FakeSDKInputStream(final int capacity, final int readToRaiseIOE) { - super(null); this.capacity = capacity; this.readToRaiseIOE = readToRaiseIOE; } @@ -282,11 +281,6 @@ public void abort() { aborted = true; } - @Override - protected boolean isAborted() { - return aborted; - } - @Override public int read() throws IOException { if (bytesRead >= capacity) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index ccb0c0e79ec7e..ce066b0b3ecc7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -27,9 +27,6 @@ import java.util.concurrent.Callable; import java.util.stream.Collectors; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -48,6 +45,11 @@ import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.store.audit.AuditSpan; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_DELETE; @@ -156,7 +158,7 @@ public static Collection params() { /** * S3 Client of the FS. */ - private AmazonS3 s3client; + private S3Client s3client; /** * Path to a file under the marker. @@ -212,7 +214,7 @@ protected Configuration createConfiguration() { public void setup() throws Exception { super.setup(); S3AFileSystem fs = getFileSystem(); - s3client = fs.getAmazonS3ClientForTesting("markers"); + s3client = fs.getAmazonS3V2ClientForTesting("markers"); bucket = fs.getBucket(); Path base = new Path(methodPath(), "base"); @@ -604,7 +606,8 @@ private void assertTestObjectsExist() throws Exception { */ private void put(final String key, final String content) throws Exception { exec("PUT " + key, () -> - s3client.putObject(bucket, key, content)); + s3client.putObject(b -> b.bucket(bucket).key(key), + RequestBody.fromString(content))); } /** * Delete an object. @@ -613,7 +616,7 @@ private void put(final String key, final String content) throws Exception { */ private void deleteObject(final String key) throws Exception { exec("DELETE " + key, () -> { - s3client.deleteObject(bucket, key); + s3client.deleteObject(b -> b.bucket(bucket).key(key)); return "deleted " + key; }); } @@ -624,10 +627,10 @@ private void deleteObject(final String key) throws Exception { * @return a description of the object. */ private String head(final String key) throws Exception { - ObjectMetadata md = exec("HEAD " + key, () -> - s3client.getObjectMetadata(bucket, key)); + HeadObjectResponse response = exec("HEAD " + key, () -> + s3client.headObject(b -> b.bucket(bucket).key(key))); return String.format("Object %s of length %d", - key, md.getInstanceLength()); + key, response.contentLength()); } /** @@ -655,7 +658,7 @@ private T exec(String op, Callable call) throws Exception { ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); try (AuditSpan span = getSpanSource().createSpan(op, null, null)) { return call.call(); - } catch (AmazonClientException ex) { + } catch (SdkException ex) { throw S3AUtils.translateException(op, "", ex); } finally { timer.end(op); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java index 6e2f547a22ec1..0c1d402305c0e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java @@ -21,17 +21,18 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStream; import java.util.concurrent.CompletableFuture; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; - import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + /** * A mock s3 file with some fault injection. */ @@ -55,7 +56,7 @@ class MockS3ARemoteObject extends S3ARemoteObject { super( S3APrefetchFakes.createReadContext(null, KEY, size, 1, 1), S3APrefetchFakes.createObjectAttributes(BUCKET, KEY, size), - S3APrefetchFakes.createInputStreamCallbacks(BUCKET, KEY), + S3APrefetchFakes.createInputStreamCallbacks(BUCKET), EmptyS3AStatisticsContext.EMPTY_INPUT_STREAM_STATISTICS, S3APrefetchFakes.createChangeTracker(BUCKET, KEY, size) ); @@ -68,7 +69,8 @@ class MockS3ARemoteObject extends S3ARemoteObject { } @Override - public InputStream openForRead(long offset, int size) throws IOException { + public ResponseInputStream openForRead(long offset, int size) + throws IOException { Validate.checkLessOrEqual(offset, "offset", size(), "size()"); Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset"); @@ -77,11 +79,15 @@ public InputStream openForRead(long offset, int size) throws IOException { throw new IOException("Throwing because throwExceptionOnOpen is true "); } int bufSize = (int) Math.min(size, size() - offset); - return new ByteArrayInputStream(contents, (int) offset, bufSize); + GetObjectResponse objectResponse = GetObjectResponse.builder().build(); + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(new ByteArrayInputStream(contents, + (int) offset, bufSize), () -> {})); } @Override - public void close(InputStream inputStream, int numRemainingBytes) { + public void close(ResponseInputStream inputStream, + int numRemainingBytes) { // do nothing since we do not use a real S3 stream. } @@ -92,7 +98,8 @@ public static byte byteAtOffset(int offset) { public static S3AInputStream.InputStreamCallbacks createClient(String bucketName) { return new S3AInputStream.InputStreamCallbacks() { @Override - public S3Object getObject(GetObjectRequest request) { + public ResponseInputStream getObject( + GetObjectRequest request) { return null; } @@ -102,8 +109,8 @@ public CompletableFuture submit(CallableRaisingIOE operation) { } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(bucketName, key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(bucketName).key(key); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java index cf6aa7ba1aa89..cb01387b21736 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java @@ -31,11 +31,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; @@ -62,6 +57,11 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; import static org.apache.hadoop.fs.s3a.Constants.HADOOP_TMP_DIR; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.emptyStatisticsStore; @@ -175,32 +175,26 @@ public static ChangeTracker createChangeTracker( createObjectAttributes(bucket, key, fileSize)); } - public static S3ObjectInputStream createS3ObjectInputStream(byte[] buffer) { - return new S3ObjectInputStream(new ByteArrayInputStream(buffer), null); + public static ResponseInputStream createS3ObjectInputStream( + GetObjectResponse objectResponse, byte[] buffer) { + return new ResponseInputStream(objectResponse, + AbortableInputStream.create(new ByteArrayInputStream(buffer), () -> {})); } public static S3AInputStream.InputStreamCallbacks createInputStreamCallbacks( - String bucket, - String key) { + String bucket) { - S3Object object = new S3Object() { - @Override - public S3ObjectInputStream getObjectContent() { - return createS3ObjectInputStream(new byte[8]); - } + GetObjectResponse objectResponse = GetObjectResponse.builder() + .eTag(E_TAG) + .build(); - @Override - public ObjectMetadata getObjectMetadata() { - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setHeader("ETag", E_TAG); - return metadata; - } - }; + ResponseInputStream responseInputStream = + createS3ObjectInputStream(objectResponse, new byte[8]); return new S3AInputStream.InputStreamCallbacks() { @Override - public S3Object getObject(GetObjectRequest request) { - return object; + public ResponseInputStream getObject(GetObjectRequest request) { + return responseInputStream; } @Override @@ -209,8 +203,8 @@ public CompletableFuture submit(CallableRaisingIOE operation) { } @Override - public GetObjectRequest newGetRequest(String key) { - return new GetObjectRequest(bucket, key); + public GetObjectRequest.Builder newGetRequestBuilder(String key) { + return GetObjectRequest.builder().bucket(bucket).key(key); } @Override @@ -229,9 +223,6 @@ public static S3ARemoteInputStream createInputStream( int prefetchBlockSize, int prefetchBlockCount) { - org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(key); - - S3AFileStatus fileStatus = createFileStatus(key, fileSize); S3ObjectAttributes s3ObjectAttributes = createObjectAttributes(bucket, key, fileSize); S3AReadOpContext s3AReadOpContext = createReadContext( @@ -242,7 +233,7 @@ public static S3ARemoteInputStream createInputStream( prefetchBlockCount); S3AInputStream.InputStreamCallbacks callbacks = - createInputStreamCallbacks(bucket, key); + createInputStreamCallbacks(bucket); S3AInputStreamStatistics stats = s3AReadOpContext.getS3AStatisticsContext().newInputStreamStatistics(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java index 813eea8389f64..7c3398ce561d7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java @@ -29,7 +29,6 @@ import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.assertj.core.api.Assertions; @@ -52,6 +51,8 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE_DEFAULT; @@ -228,7 +229,7 @@ private File deleteFiles(final int requestCount, Path basePath = path("testDeleteObjectThrottling"); final S3AFileSystem fs = getFileSystem(); final String base = fs.pathToKey(basePath); - final List fileList + final List fileList = buildDeleteRequest(base, entries); final FileWriter out = new FileWriter(csvFile); Csvout csvout = new Csvout(out, "\t", "\n"); @@ -304,12 +305,12 @@ private File deleteFiles(final int requestCount, } - private List buildDeleteRequest( + private List buildDeleteRequest( String base, int count) { - List request = new ArrayList<>(count); + List request = new ArrayList<>(count); for (int i = 0; i < count; i++) { - request.add(new DeleteObjectsRequest.KeyVersion( - String.format("%s/file-%04d", base, i))); + request.add(ObjectIdentifier.builder().key( + String.format("%s/file-%04d", base, i)).build()); } return request; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java index de903b3d75a57..2d380a9aef6d0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3ADataBlocks; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; @@ -41,6 +42,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; + import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -49,10 +53,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; - import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_KEEP; import static org.apache.hadoop.fs.s3a.Statistic.*; @@ -249,18 +249,19 @@ public void testMultiPagesListingPerformanceAndCorrectness() = fs.getWriteOperationHelper(); final RequestFactory requestFactory = writeOperationHelper.getRequestFactory(); - List> futures = + List> futures = new ArrayList<>(numOfPutRequests); for (int i=0; i - writeOperationHelper.putObject(put, PutObjectOptions.keepingDirs(), null))); + PutObjectRequest.Builder putObjectRequestBuilder = requestFactory + .newPutObjectRequestBuilder(fs.pathToKey(file), + null, 128, false); + futures.add(submit(executorService, + () -> writeOperationHelper.putObject(putObjectRequestBuilder.build(), + PutObjectOptions.keepingDirs(), + new S3ADataBlocks.BlockUploadData(new FailingInputStream()), false, null))); } LOG.info("Waiting for PUTs to complete"); waitForCompletion(futures); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java index 99407467df56d..006c989604fd7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java @@ -126,7 +126,7 @@ private void skipQuietly(String text) { protected void assertStorageClass(Path hugeFile) throws IOException { S3AFileSystem fs = getFileSystem(); - String actual = fs.getObjectMetadata(hugeFile).getStorageClass(); + String actual = fs.getObjectMetadata(hugeFile).storageClassAsString(); assertTrue( "Storage class of object is " + actual + ", expected " + STORAGE_CLASS_REDUCED_REDUNDANCY, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java index 2c1a10a21d0ab..a3d41116182e5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java @@ -263,9 +263,9 @@ private static CsvFile writeStandardHeader(final CsvFile csv, protected static AWSServiceIOException verifyErrorCode(final String code, final AWSServiceIOException ex) { logIntercepted(ex); - if (!code.equals(ex.getErrorCode())) { + if (!code.equals(ex.awsErrorDetails().errorCode())) { throw new AssertionError("Expected Error code" + code - + " actual " + ex.getErrorCode(), + + " actual " + ex.awsErrorDetails().errorCode(), ex); } return ex; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java index 77c7736575c39..fdf5eb53e187e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/ExtraAssertions.java @@ -147,7 +147,7 @@ public static T extractCause(Class expected, */ protected void assertStatusCode(AWSServiceIOException e, int code) throws AWSServiceIOException { - if (e.getStatusCode() != code) { + if (e.statusCode() != code) { throw e; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java index fa1ad2db62af7..0d74ff52f46f2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java @@ -21,14 +21,14 @@ import java.io.IOException; import java.util.List; -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.transfer.model.CopyResult; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.CopyObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AReadOpContext; @@ -88,7 +88,7 @@ public RemoteIterator listFilesAndDirectoryMarkers( } @Override - public CopyResult copyFile( + public CopyObjectResponse copyFile( String srcKey, String destKey, S3ObjectAttributes srcAttributes, @@ -99,9 +99,9 @@ public CopyResult copyFile( @Override public void removeKeys( - List keysToDelete, + List keysToDelete, boolean deleteFakeDir) - throws MultiObjectDeleteException, AmazonClientException, + throws MultiObjectDeleteException, AwsServiceException, IOException { } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java index ffba558d11fd0..eee0c71950566 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalWriteOperationHelperCallbacks.java @@ -18,10 +18,12 @@ package org.apache.hadoop.fs.s3a.test; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.SelectObjectContentRequest; -import com.amazonaws.services.s3.model.SelectObjectContentResult; +import java.util.concurrent.CompletableFuture; + +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import org.apache.hadoop.fs.s3a.WriteOperationHelper; @@ -32,12 +34,14 @@ public class MinimalWriteOperationHelperCallbacks implements WriteOperationHelper.WriteOperationHelperCallbacks { @Override - public SelectObjectContentResult selectObjectContent(SelectObjectContentRequest request) { + public CompletableFuture selectObjectContent( + SelectObjectContentRequest request, + SelectObjectContentResponseHandler th) { return null; } @Override - public CompleteMultipartUploadResult completeMultipartUpload( + public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { return null; } diff --git a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 0000000000000..3b308f19255c3 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1,13 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mock-maker-inline \ No newline at end of file From 68510c8ab12226488bdd457aa5162d9eb034e13b Mon Sep 17 00:00:00 2001 From: ahmarsuhail Date: Mon, 28 Nov 2022 16:23:16 +0000 Subject: [PATCH 02/13] HADOOP-18073. Address review comments. (#31) addresses review comments + yetus errors Co-authored-by: Ahmar Suhail --- .../hadoop/fs/s3a/AWSClientIOException.java | 2 +- .../fs/s3a/AWSCredentialProviderList.java | 4 +- .../hadoop/fs/s3a/AWSServiceIOException.java | 4 +- .../CredentialInitializationException.java | 4 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 24 ++--- .../fs/s3a/InconsistentS3ClientFactory.java | 2 +- .../org/apache/hadoop/fs/s3a/Invoker.java | 4 +- .../org/apache/hadoop/fs/s3a/Listing.java | 4 +- .../fs/s3a/MultiObjectDeleteException.java | 9 +- .../apache/hadoop/fs/s3a/MultipartUtils.java | 6 +- .../fs/s3a/ProgressableProgressListener.java | 5 +- .../hadoop/fs/s3a/S3ABlockOutputStream.java | 15 ++- .../apache/hadoop/fs/s3a/S3ADataBlocks.java | 6 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 72 +++++++------- .../apache/hadoop/fs/s3a/S3AInputStream.java | 6 +- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 4 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 18 ++-- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 8 +- .../hadoop/fs/s3a/WriteOperationHelper.java | 9 +- .../V1ToV2AwsCredentialProviderAdapter.java | 1 - .../hadoop/fs/s3a/api/RequestFactory.java | 10 +- .../hadoop/fs/s3a/audit/AuditIntegration.java | 2 +- .../hadoop/fs/s3a/audit/AuditManagerS3A.java | 5 +- .../s3a/audit/impl/ActiveAuditManagerS3A.java | 30 +++--- .../s3a/audit/impl/NoopAuditManagerS3A.java | 5 +- .../AbstractSessionCredentialsProvider.java | 4 +- .../auth/AssumedRoleCredentialProvider.java | 8 +- .../s3a/auth/MarshalledCredentialBinding.java | 12 ++- .../hadoop/fs/s3a/auth/STSClientFactory.java | 40 +++----- .../EncryptionSecretOperations.java | 2 +- .../s3a/auth/delegation/RoleTokenBinding.java | 2 +- .../auth/delegation/S3ADelegationTokens.java | 1 - .../auth/delegation/SessionTokenBinding.java | 6 +- .../fs/s3a/commit/AbstractS3ACommitter.java | 2 +- .../s3a/commit/files/SinglePendingCommit.java | 2 +- .../fs/s3a/commit/impl/CommitOperations.java | 6 +- .../s3a/commit/magic/MagicCommitTracker.java | 4 +- .../fs/s3a/{ => impl}/AWSCannedACL.java | 6 +- .../fs/s3a/{ => impl}/AWSClientConfig.java | 5 +- .../apache/hadoop/fs/s3a/impl/AWSHeaders.java | 98 +++++++++++++++++++ .../fs/s3a/impl/BulkDeleteRetryHandler.java | 6 +- .../fs/s3a/impl/ChangeDetectionPolicy.java | 10 +- .../hadoop/fs/s3a/impl/ChangeTracker.java | 7 +- .../hadoop/fs/s3a/impl/DeleteOperation.java | 2 +- .../hadoop/fs/s3a/impl/HeaderProcessing.java | 43 ++++---- .../hadoop/fs/s3a/impl/RenameOperation.java | 5 +- .../fs/s3a/impl/RequestFactoryImpl.java | 53 +++++----- .../hadoop/fs/s3a/impl/SDKStreamDrainer.java | 2 +- .../fs/s3a/prefetch/S3ARemoteObject.java | 6 +- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 4 +- .../fs/s3a/select/BlockingEnumeration.java | 8 +- .../hadoop/fs/s3a/select/SelectBinding.java | 15 ++- .../fs/s3a/select/SelectInputStream.java | 5 +- .../s3a/select/SelectObjectContentHelper.java | 9 +- .../impl/AwsStatisticsCollector.java | 40 ++++---- .../hadoop/fs/s3a/tools/MarkerTool.java | 6 +- .../fs/s3a/tools/MarkerToolOperations.java | 5 +- .../s3a/tools/MarkerToolOperationsImpl.java | 5 +- .../tools/hadoop-aws/aws_sdk_v2_changelog.md | 78 +++++++-------- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 13 +-- .../s3a/ITestS3AAWSCredentialsProvider.java | 4 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 8 +- .../s3a/ITestS3AClientSideEncryptionKms.java | 6 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 8 +- .../fs/s3a/ITestS3AFailureHandling.java | 5 +- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 8 +- .../hadoop/fs/s3a/ITestS3AMultipartUtils.java | 2 +- .../fs/s3a/ITestS3ATemporaryCredentials.java | 6 +- .../hadoop/fs/s3a/MockS3AFileSystem.java | 6 +- .../hadoop/fs/s3a/MultipartTestUtils.java | 9 +- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 4 +- .../apache/hadoop/fs/s3a/TestArnResource.java | 2 +- .../org/apache/hadoop/fs/s3a/TestInvoker.java | 8 +- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 8 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 7 +- .../fs/s3a/TestS3AExceptionTranslation.java | 9 +- .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 38 +++---- .../fs/s3a/TestS3AInputStreamRetry.java | 14 +-- .../apache/hadoop/fs/s3a/TestS3AUnbuffer.java | 15 +-- .../fs/s3a/TestStreamChangeTracker.java | 8 +- .../fs/s3a/audit/AbstractAuditingTest.java | 13 ++- .../fs/s3a/audit/TestAuditIntegration.java | 13 ++- .../fs/s3a/audit/TestAuditSpanLifecycle.java | 2 +- .../audit/TestHttpReferrerAuditHeader.java | 2 +- .../fs/s3a/audit/TestLoggingAuditor.java | 12 +-- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 4 +- .../s3a/auth/TestMarshalledCredentials.java | 2 +- .../ITestSessionDelegationInFilesystem.java | 6 +- .../ITestSessionDelegationTokens.java | 4 +- .../s3a/commit/staging/StagingTestBase.java | 28 +++--- .../staging/TestDirectoryCommitterScale.java | 3 +- .../commit/staging/TestStagingCommitter.java | 6 +- .../TestStagingPartitionedTaskCommit.java | 2 +- .../fs/s3a/impl/ITestRenameDeleteRace.java | 3 +- .../hadoop/fs/s3a/impl/ITestXAttrCost.java | 2 +- .../fs/s3a/impl/TestHeaderProcessing.java | 4 +- .../fs/s3a/impl/TestRequestFactory.java | 8 +- .../fs/s3a/impl/TestSDKStreamDrainer.java | 2 +- .../ITestDirectoryMarkerListing.java | 8 +- .../fs/s3a/prefetch/MockS3ARemoteObject.java | 10 +- .../fs/s3a/prefetch/S3APrefetchFakes.java | 9 +- .../ILoadTestS3ABulkDeleteThrottling.java | 2 +- .../scale/ITestS3ADirectoryPerformance.java | 5 +- 103 files changed, 600 insertions(+), 504 deletions(-) rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/{ => impl}/AWSCannedACL.java (92%) rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/{ => impl}/AWSClientConfig.java (98%) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java index 377ffe9b7b56d..b8c65d48c8906 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java @@ -18,11 +18,11 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.core.exception.SdkException; import org.apache.hadoop.util.Preconditions; import java.io.IOException; -import software.amazon.awssdk.core.exception.SdkException; /** * IOException equivalent of an {@link SdkException}. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 228a9b8bd4667..5e98b99966b78 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -29,12 +29,12 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.BasicSessionCredentials; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.util.Preconditions; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java index 49bb8ec09700d..72d1095188075 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSServiceIOException.java @@ -18,11 +18,11 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import software.amazon.awssdk.awscore.exception.AwsErrorDetails; -import software.amazon.awssdk.awscore.exception.AwsServiceException; /** * A specific exception from AWS operations. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java index 92e8c99bb2962..ff6dc6a60379e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/CredentialInitializationException.java @@ -50,5 +50,7 @@ public CredentialInitializationException(String message) { * @return false, always. */ @Override - public boolean retryable() { return false; } + public boolean retryable() { + return false; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index af80beae66d09..64c62f8876f93 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -43,6 +43,7 @@ import com.amazonaws.util.AwsHostNameUtils; import com.amazonaws.util.RuntimeHttpUtils; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; @@ -72,7 +73,7 @@ import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; import org.apache.hadoop.fs.store.LogExactlyOnce; -import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; @@ -227,25 +228,20 @@ public S3AsyncClient createS3AsyncClient( * @param S3 client builder type * @param S3 client type */ - private , ClientT> - BuilderT configureClientBuilder( - BuilderT builder, - S3ClientCreationParameters parameters, - Configuration conf, - String bucket) { + private , ClientT> BuilderT configureClientBuilder( + BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket) { URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); - Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, - parameters.getCredentialSet()); + Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, parameters.getCredentialSet()); LOG.debug("Using endpoint {}; and region {}", endpoint, region); // TODO: Some configuration done in configureBasicParams is not done yet. S3Configuration serviceConfiguration = S3Configuration.builder() - .pathStyleAccessEnabled(parameters.isPathStyleAccess()) - // TODO: Review. Currently required to pass access point tests in ITestS3ABucketExistence, - // but resolving the region from the ap may be the correct solution. - .useArnRegionEnabled(true) - .build(); + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + // TODO: Review. Currently required to pass access point tests in ITestS3ABucketExistence, + // but resolving the region from the ap may be the correct solution. + .useArnRegionEnabled(true) + .build(); return builder .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index c1f6bd6f1fd1a..e9946e7e85c34 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -64,7 +64,7 @@ private static class FailureInjectionInterceptor implements ExecutionInterceptor */ private final AtomicLong failureCounter = new AtomicLong(0); - public FailureInjectionInterceptor(FailureInjectionPolicy policy) { + FailureInjectionInterceptor(FailureInjectionPolicy policy) { this.policy = policy; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 58e65530c235e..ec232728eee94 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -24,6 +24,7 @@ import java.util.concurrent.Future; import javax.annotation.Nullable; +import software.amazon.awssdk.core.exception.SdkException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,7 +38,6 @@ import org.apache.hadoop.util.functional.InvocationRaisingIOE; import org.apache.hadoop.util.Preconditions; -import software.amazon.awssdk.core.exception.SdkException; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration; @@ -444,7 +444,7 @@ public T retryUntranslated( * @param operation operation to execute * @return the result of the call * @throws IOException any IOE raised - * @throws SdkBaseException any AWS exception raised + * @throws SdkException any AWS exception raised * @throws RuntimeException : these are never caught and retries. */ @Retries.RetryRaw diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index b4674159ea473..490deaaab04d9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.VisibleForTesting; @@ -38,8 +40,6 @@ import org.apache.hadoop.util.functional.RemoteIterators; import org.slf4j.Logger; -import software.amazon.awssdk.services.s3.model.CommonPrefix; -import software.amazon.awssdk.services.s3.model.S3Object; import java.io.Closeable; import java.io.IOException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java index fdc60a638a77e..4166bcea90d37 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java @@ -22,11 +22,10 @@ import java.nio.file.AccessDeniedException; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.services.s3.model.S3Error; import software.amazon.awssdk.services.s3.model.S3Exception; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -61,7 +60,9 @@ public MultiObjectDeleteException(List errors) { this.errors = errors; } - public List errors() { return errors; } + public List errors() { + return errors; + } /** * A {@code MultiObjectDeleteException} is raised if one or more diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java index c471e052d4f08..296ec18dcf18d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultipartUtils.java @@ -23,13 +23,13 @@ import java.util.NoSuchElementException; import javax.annotation.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; import software.amazon.awssdk.services.s3.model.MultipartUpload; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.api.RequestFactory; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java index 8ef7bc4b36c9c..1c0fd76c6b107 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java @@ -18,12 +18,11 @@ package org.apache.hadoop.fs.s3a; -import org.slf4j.Logger; - import software.amazon.awssdk.transfer.s3.ObjectTransfer; import software.amazon.awssdk.transfer.s3.progress.TransferListener; - import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; + /** * Listener to progress from AWS regarding transfers. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index 4b450b7ee9ff0..39d7ed72d7c8d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -31,6 +31,13 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import com.amazonaws.event.ProgressEvent; import com.amazonaws.event.ProgressEventType; import com.amazonaws.event.ProgressListener; @@ -45,14 +52,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.model.CompletedPart; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; -import software.amazon.awssdk.services.s3.model.UploadPartResponse; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Abortable; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java index de9d1ddca1666..e82b55f22457a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java @@ -101,7 +101,7 @@ static BlockFactory createFactory(S3AFileSystem owner, * It can be one of a file or an input stream. * When closed, any stream is closed. Any source file is untouched. */ -public static final class BlockUploadData implements Closeable { + public static final class BlockUploadData implements Closeable { private final File file; private final InputStream uploadStream; @@ -109,7 +109,7 @@ public static final class BlockUploadData implements Closeable { * File constructor; input stream will be null. * @param file file to upload */ - public BlockUploadData(File file) { + public BlockUploadData(File file) { Preconditions.checkArgument(file.exists(), "No file: " + file); this.file = file; this.uploadStream = null; @@ -119,7 +119,7 @@ public BlockUploadData(File file) { * Stream constructor, file field will be null. * @param uploadStream stream to upload */ - public BlockUploadData(InputStream uploadStream) { + public BlockUploadData(InputStream uploadStream) { Preconditions.checkNotNull(uploadStream, "rawUploadStream"); this.uploadStream = uploadStream; this.file = null; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d5fd4f154f68c..324e03ff8a465 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -51,14 +51,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; -import com.amazonaws.services.s3.Headers; - -import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.core.ResponseInputStream; import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; @@ -102,6 +97,11 @@ import software.amazon.awssdk.transfer.s3.S3TransferManager; import software.amazon.awssdk.transfer.s3.UploadFileRequest; +import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -121,6 +121,8 @@ import org.apache.hadoop.fs.s3a.auth.SignerManager; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider; +import org.apache.hadoop.fs.s3a.impl.AWSCannedACL; +import org.apache.hadoop.fs.s3a.impl.AWSHeaders; import org.apache.hadoop.fs.s3a.impl.BulkDeleteRetryHandler; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; @@ -837,29 +839,25 @@ protected static S3AStorageStatistics createStorageStatistics( /** * Verify that the bucket exists. - * TODO: Review: this used to call doesBucketExist in v1, which does not check permissions, not even read access. * Retry policy: retrying, translated. * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ + // TODO: Review: this used to call doesBucketExist in v1, which does not check permissions, + // not even read access. @Retries.RetryTranslated - protected void verifyBucketExists() - throws UnknownStoreException, IOException { + protected void verifyBucketExists() throws UnknownStoreException, IOException { if (!invoker.retry("doesBucketExist", bucket, true, - trackDurationOfOperation(getDurationTrackerFactory(), - STORE_EXISTS_PROBE.getSymbol(), + trackDurationOfOperation(getDurationTrackerFactory(), STORE_EXISTS_PROBE.getSymbol(), () -> { - try { - s3Client.headBucket(HeadBucketRequest.builder() - .bucket(bucket) - .build()); - return true; - } catch (NoSuchBucketException e) { - return false; - } - }))) { - throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " - + "not exist"); + try { + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + return true; + } catch (NoSuchBucketException e) { + return false; + } + }))) { + throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); } } @@ -1106,13 +1104,13 @@ protected RequestFactory createRequestFactory() { .toUpperCase(Locale.US); StorageClass storageClass = null; if (!storageClassConf.isEmpty()) { - storageClass = StorageClass.fromValue(storageClassConf); + storageClass = StorageClass.fromValue(storageClassConf); - if (storageClass.equals(StorageClass.UNKNOWN_TO_SDK_VERSION)) { - LOG.warn("Unknown storage class property {}: {}; falling back to default storage class", - STORAGE_CLASS, storageClassConf); - storageClass = null; - } + if (storageClass.equals(StorageClass.UNKNOWN_TO_SDK_VERSION)) { + LOG.warn("Unknown storage class property {}: {}; falling back to default storage class", + STORAGE_CLASS, storageClassConf); + storageClass = null; + } } else { LOG.debug("Unset storage class property {}; falling back to default storage class", @@ -1184,10 +1182,14 @@ private void initTransferManager() { // TODO: move to client factory? transferManager = S3TransferManager.builder() .s3ClientConfiguration(clientConfiguration -> - // TODO: other configuration options? + // TODO: Temporarily using EU_WEST_1 as the region, ultimately this can maybe moved to + // the DefaultS3ClientFactory and use the region resolution logic there. Wait till we + // finalise region logic before making any changes here. Also add other + // configuration options? clientConfiguration .minimumPartSizeInBytes(partSize) - .credentialsProvider(credentials)) + .credentialsProvider(credentials) + .region(Region.EU_WEST_1)) .transferConfiguration(transferConfiguration -> transferConfiguration.executor(unboundedThreadPool)) // TODO: double-check .build(); @@ -1713,7 +1715,7 @@ private final class WriteOperationHelperCallbacksImpl public CompletableFuture selectObjectContent( SelectObjectContentRequest request, SelectObjectContentResponseHandler responseHandler) { - return s3AsyncClient.selectObjectContent(request, responseHandler); + return s3AsyncClient.selectObjectContent(request, responseHandler); } @Override @@ -2935,9 +2937,9 @@ private DeleteObjectsResponse deleteObjects(DeleteObjectsRequest deleteRequest) // duration is tracked in the bulk delete counters trackDurationOfOperation(getDurationTrackerFactory(), OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { - incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); - return s3Client.deleteObjects(deleteRequest); - })); + incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); + return s3Client.deleteObjects(deleteRequest); + })); if (!response.errors().isEmpty()) { // one or more of the keys could not be deleted. @@ -3778,7 +3780,7 @@ S3AFileStatus s3GetFileStatus(final Path path, long contentLength = meta.contentLength(); // check if CSE is enabled, then strip padded length. if (isCSEEnabled && - meta.metadata().get(Headers.CRYPTO_CEK_ALGORITHM) != null + meta.metadata().get(AWSHeaders.CRYPTO_CEK_ALGORITHM) != null && contentLength >= CSE_PADDING_LENGTH) { contentLength -= CSE_PADDING_LENGTH; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index d3fa0a0e84799..2ed9083efcddd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -32,6 +32,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.IntFunction; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,9 +61,6 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.functional.CallableRaisingIOE; -import software.amazon.awssdk.core.ResponseInputStream; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; import static java.util.Objects.requireNonNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index d4208b082749b..2f21be9e6d107 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -30,6 +30,8 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import software.amazon.awssdk.core.exception.SdkException; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,9 +43,7 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; -import org.apache.hadoop.util.Preconditions; -import software.amazon.awssdk.core.exception.SdkException; import static org.apache.hadoop.io.retry.RetryPolicies.*; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 380a707efa262..07cbfd268278f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -21,6 +21,14 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.Protocol; import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.retry.RetryUtils; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; @@ -46,19 +54,9 @@ import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.Lists; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.core.exception.AbortedException; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.retry.RetryUtils; -import software.amazon.awssdk.services.s3.model.S3Exception; -import software.amazon.awssdk.services.s3.model.S3Object; - import javax.annotation.Nullable; import java.io.Closeable; import java.io.EOFException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 97a9bebdd226d..1f1344677dbf4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -25,6 +25,10 @@ import java.util.List; import java.util.Map; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; import com.amazonaws.monitoring.MonitoringListener; import com.amazonaws.services.s3.AmazonS3; @@ -32,10 +36,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import software.amazon.awssdk.services.s3.S3AsyncClient; -import software.amazon.awssdk.services.s3.S3Client; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 98edcb9d7d4ba..40eec3bed8213 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -25,9 +25,6 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; @@ -40,6 +37,9 @@ import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -711,7 +711,8 @@ public interface WriteOperationHelperCallbacks { * @param request selectObjectContent request * @return selectObjectContentResult */ - CompletableFuture selectObjectContent(SelectObjectContentRequest request, SelectObjectContentResponseHandler t); + CompletableFuture selectObjectContent(SelectObjectContentRequest request, + SelectObjectContentResponseHandler t); /** * Initiates a complete multi-part upload request. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java index db82267044404..242a29fe21396 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -22,7 +22,6 @@ import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSSessionCredentials; import com.amazonaws.auth.AnonymousAWSCredentials; - import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 4806543815cdb..d05a7dc878a38 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -21,11 +21,6 @@ import javax.annotation.Nullable; import java.util.List; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; -import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; -import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; - import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompletedPart; @@ -46,6 +41,11 @@ import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; +import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; +import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; + /** * Factory for S3 objects. * diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java index 4389c49d866bb..352acd56092c8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditIntegration.java @@ -23,6 +23,7 @@ import java.lang.reflect.InvocationTargetException; import java.nio.file.AccessDeniedException; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,7 +34,6 @@ import org.apache.hadoop.fs.s3a.audit.impl.NoopAuditManagerS3A; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java index e83216b3a75bc..bb7f94cfc206d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AuditManagerS3A.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.List; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; @@ -29,8 +32,6 @@ import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.service.Service; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import software.amazon.awssdk.transfer.s3.progress.TransferListener; /** * Interface for Audit Managers auditing operations through the diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java index 0a05a852a0fd7..a45d19dfa000b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java @@ -25,6 +25,14 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.SdkResponse; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.http.SdkHttpResponse; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,15 +60,6 @@ import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.functional.FutureIO; -import software.amazon.awssdk.core.SdkRequest; -import software.amazon.awssdk.core.SdkResponse; -import software.amazon.awssdk.core.interceptor.Context; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import software.amazon.awssdk.http.SdkHttpRequest; -import software.amazon.awssdk.http.SdkHttpResponse; -import software.amazon.awssdk.transfer.s3.progress.TransferListener; - import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE; import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION; @@ -411,7 +410,7 @@ public List createExecutionInterceptors() } // TODO: should we remove this and use Global/Service interceptors, see: - // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html + // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html final Class[] interceptors = getConfig().getClasses(AUDIT_EXECUTION_INTERCEPTORS); if (interceptors != null) { for (Class handler : interceptors) { @@ -540,15 +539,16 @@ private AuditSpanS3A extractAndActivateSpanFromRequest( /** * Forward to active span. - * @param request request + * @param context execution context + * @param executionAttributes the execution attributes * {@inheritDoc} - */@Override + */ + @Override public void onExecutionFailure(Context.FailedExecution context, ExecutionAttributes executionAttributes) { try { - extractAndActivateSpanFromRequest(context.request(), - executionAttributes) - .onExecutionFailure(context, executionAttributes); + extractAndActivateSpanFromRequest(context.request(), executionAttributes).onExecutionFailure( + context, executionAttributes); } catch (AuditFailureException e) { ioStatisticsStore.incrementCounter(AUDIT_FAILURE.getSymbol()); throw e; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java index 26d2db55c5641..e58c906460daa 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/NoopAuditManagerS3A.java @@ -24,6 +24,9 @@ import java.util.List; import java.util.UUID; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -35,8 +38,6 @@ import org.apache.hadoop.fs.s3a.audit.OperationAuditorOptions; import org.apache.hadoop.service.CompositeService; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import software.amazon.awssdk.transfer.s3.progress.TransferListener; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java index 365885cc70ab5..c88a0128f8ec5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AbstractSessionCredentialsProvider.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -30,8 +32,6 @@ import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.core.exception.SdkException; /** * Base class for session credential support. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index eb32ed8afc8a1..e493154d85535 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -26,10 +26,6 @@ import java.util.Locale; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.Sets; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; @@ -38,6 +34,10 @@ import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; import software.amazon.awssdk.services.sts.model.StsException; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.util.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java index e91f8b0824020..a84318891e9fa 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/MarshalledCredentialBinding.java @@ -24,10 +24,6 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.classification.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; @@ -35,6 +31,10 @@ import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.sts.StsClient; import software.amazon.awssdk.services.sts.model.Credentials; +import org.apache.hadoop.classification.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.Invoker; @@ -188,6 +188,7 @@ public static AwsCredentials toAWSCredentials( * @param stsRegion region; use if the endpoint isn't the AWS default. * @param duration duration of the credentials in seconds. Minimum value: 900. * @param invoker invoker to use for retrying the call. + * @param bucket bucket name. * @return the credentials * @throws IOException on a failure of the request */ @@ -205,7 +206,8 @@ public static MarshalledCredentials requestSessionCredentials( STSClientFactory.builder(parentCredentials, configuration, stsEndpoint.isEmpty() ? null : stsEndpoint, - stsRegion, bucket) + stsRegion, + bucket) .build(); try (STSClientFactory.STSClient stsClient = STSClientFactory.createClientConnection( tokenService, invoker)) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java index ebd8ad9fddc98..62f6ea845e6bf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java @@ -24,11 +24,6 @@ import java.net.URISyntaxException; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.fs.s3a.AWSClientConfig; -import org.apache.hadoop.util.Preconditions; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import software.amazon.awssdk.core.retry.RetryPolicy; @@ -41,6 +36,12 @@ import software.amazon.awssdk.services.sts.model.Credentials; import software.amazon.awssdk.services.sts.model.GetSessionTokenRequest; import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; +import org.apache.hadoop.util.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -118,11 +119,8 @@ public static StsClientBuilder builder( * @param stsRegion the region, e.g "us-west-1". Must be set if endpoint is. * @return the builder to call {@code build()} */ - public static StsClientBuilder builder( - final AwsCredentialsProvider credentials, - final Configuration conf, - final String stsEndpoint, - final String stsRegion, + public static StsClientBuilder builder(final AwsCredentialsProvider credentials, + final Configuration conf, final String stsEndpoint, final String stsRegion, final String bucket) throws IOException { final StsClientBuilder stsClientBuilder = StsClient.builder(); @@ -136,31 +134,25 @@ public static StsClientBuilder builder( final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); - final ProxyConfiguration proxyConfig = - AWSClientConfig.createProxyConfiguration(conf, bucket); + final ProxyConfiguration proxyConfig = AWSClientConfig.createProxyConfiguration(conf, bucket); clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); httpClientBuilder.proxyConfiguration(proxyConfig); - stsClientBuilder - .httpClientBuilder(httpClientBuilder) + stsClientBuilder.httpClientBuilder(httpClientBuilder) .overrideConfiguration(clientOverrideConfigBuilder.build()) - .credentialsProvider(credentials); + .credentialsProvider(credentials); // TODO: SIGNERS NOT ADDED YET. boolean destIsStandardEndpoint = STS_STANDARD.equals(stsEndpoint); if (isNotEmpty(stsEndpoint) && !destIsStandardEndpoint) { - Preconditions.checkArgument( - isNotEmpty(stsRegion), - "STS endpoint is set to %s but no signing region was provided", - stsEndpoint); + Preconditions.checkArgument(isNotEmpty(stsRegion), + "STS endpoint is set to %s but no signing region was provided", stsEndpoint); LOG.debug("STS Endpoint={}; region='{}'", stsEndpoint, stsRegion); - stsClientBuilder.endpointOverride(getSTSEndpoint(stsEndpoint)) - .region(Region.of(stsRegion)); + stsClientBuilder.endpointOverride(getSTSEndpoint(stsEndpoint)).region(Region.of(stsRegion)); } else { Preconditions.checkArgument(isEmpty(stsRegion), - "STS signing region set set to %s but no STS endpoint specified", - stsRegion); + "STS signing region set set to %s but no STS endpoint specified", stsRegion); } return stsClientBuilder; } @@ -174,7 +166,7 @@ public static StsClientBuilder builder( private static URI getSTSEndpoint(String endpoint) { try { // TODO: The URI builder is currently imported via a shaded dependency. This is due to TM - // preview dependency causing some issues. + // preview dependency causing some issues. return new URIBuilder().setScheme("https").setHost(endpoint).build(); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java index 889e1e2c4af79..bcd358e2d1672 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java @@ -42,7 +42,7 @@ public static Optional getSSECustomerKey(final EncryptionSecrets secrets if (secrets.hasEncryptionKey() && secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_C) { return Optional.of(secrets.getEncryptionKey()); } else { - return Optional.empty(); + return Optional.empty(); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java index cb0cb64233aa1..e83462b92a086 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/RoleTokenBinding.java @@ -23,11 +23,11 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; +import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java index 73123a0d71ee4..0a73411156b6d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java @@ -42,7 +42,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.DelegationTokenIssuer; import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.util.DurationInfo; import static org.apache.hadoop.fs.s3a.Statistic.DELEGATION_TOKENS_ISSUED; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 434ec5b24670a..440f5305af2c2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -26,12 +26,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.hadoop.classification.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import software.amazon.awssdk.services.sts.StsClient; +import org.apache.hadoop.classification.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java index 54d3bc2e24e72..09664a6dbdf63 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java @@ -27,10 +27,10 @@ import java.util.List; import java.util.UUID; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.StringUtils; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java index 8801c8bdce797..e4541ba4da370 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/files/SinglePendingCommit.java @@ -31,9 +31,9 @@ import java.util.List; import java.util.Map; +import software.amazon.awssdk.services.s3.model.CompletedPart; import com.fasterxml.jackson.annotation.JsonProperty; -import software.amazon.awssdk.services.s3.model.CompletedPart; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java index 0dc0db24bafcd..d1943fa47773f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/impl/CommitOperations.java @@ -34,14 +34,14 @@ import javax.annotation.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.services.s3.model.CompletedPart; import software.amazon.awssdk.services.s3.model.MultipartUpload; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystem; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java index 135adf0de39a2..b2e703e1b088d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicCommitTracker.java @@ -25,11 +25,11 @@ import java.util.List; import java.util.Map; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.CompletedPart; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java similarity index 92% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java index ac1e5f412b86d..2f52f3ae9a832 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCannedACL.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a; +package org.apache.hadoop.fs.s3a.impl; /** * Enum to map AWS SDK V1 Acl values to SDK V2. @@ -36,5 +36,7 @@ public enum AWSCannedACL { this.value = value; } - public String toString() { return this.value; } + public String toString() { + return this.value; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java similarity index 98% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 00f5a9fbf5d4d..55fb1132bb0e7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a; +package org.apache.hadoop.fs.s3a.impl; import java.io.IOException; import java.net.URI; @@ -32,9 +32,12 @@ import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.http.apache.ProxyConfiguration; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +// TODO: Update to use the non shaded dependency. There is an issue with the preview version of TM +// which is preventing this, should be resolve with the TM release. import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.util.VersionInfo; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java new file mode 100644 index 0000000000000..3cb714588bd39 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Common S3 HTTP header values used throughout the Amazon Web Services S3 Java client. + */ +public interface AWSHeaders { + + /* + * Standard HTTP Headers + */ + + String CACHE_CONTROL = "Cache-Control"; + String CONTENT_DISPOSITION = "Content-Disposition"; + String CONTENT_ENCODING = "Content-Encoding"; + String CONTENT_LENGTH = "Content-Length"; + String CONTENT_RANGE = "Content-Range"; + String CONTENT_MD5 = "Content-MD5"; + String CONTENT_TYPE = "Content-Type"; + String CONTENT_LANGUAGE = "Content-Language"; + String DATE = "Date"; + String ETAG = "ETag"; + String LAST_MODIFIED = "Last-Modified"; + + /* + * Amazon HTTP Headers used by S3A + */ + + /** S3's version ID header */ + String S3_VERSION_ID = "x-amz-version-id"; + + /** Header describing what class of storage a user wants */ + String STORAGE_CLASS = "x-amz-storage-class"; + + /** Header describing what archive tier the object is in, if any */ + String ARCHIVE_STATUS = "x-amz-archive-status"; + + /** Header for optional server-side encryption algorithm */ + String SERVER_SIDE_ENCRYPTION = "x-amz-server-side-encryption"; + + /** Range header for the get object request */ + String RANGE = "Range"; + + /** + * Encrypted symmetric key header that is used in the Encryption Only (EO) envelope + * encryption mechanism. + */ + @Deprecated + String CRYPTO_KEY = "x-amz-key"; + + /** JSON-encoded description of encryption materials used during encryption */ + String MATERIALS_DESCRIPTION = "x-amz-matdesc"; + + /** Header for the optional restore information of an object */ + String RESTORE = "x-amz-restore"; + + /** + * Key wrapping algorithm such as "AESWrap" and "RSA/ECB/OAEPWithSHA-256AndMGF1Padding". + */ + String CRYPTO_KEYWRAP_ALGORITHM = "x-amz-wrap-alg"; + /** + * Content encryption algorithm, such as "AES/GCM/NoPadding". + */ + String CRYPTO_CEK_ALGORITHM = "x-amz-cek-alg"; + + /** + * Headers in request indicating that the requester must be charged for data + * transfer. + */ + String REQUESTER_PAYS_HEADER = "x-amz-request-payer"; + + /** Header for the replication status of an Amazon S3 Object.*/ + String OBJECT_REPLICATION_STATUS = "x-amz-replication-status"; + + String OBJECT_LOCK_MODE = "x-amz-object-lock-mode"; + + String OBJECT_LOCK_RETAIN_UNTIL_DATE = "x-amz-object-lock-retain-until-date"; + + String OBJECT_LOCK_LEGAL_HOLD_STATUS = "x-amz-object-lock-legal-hold"; + +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java index f076a4b701e00..5808607762ea6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteRetryHandler.java @@ -20,6 +20,9 @@ import java.util.List; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,9 +31,6 @@ import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; -import software.amazon.awssdk.core.exception.SdkClientException; -import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException; import static org.apache.hadoop.fs.s3a.Statistic.IGNORED_ERRORS; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index f9d673e657966..25f7c4e9c1a74 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -20,16 +20,15 @@ import java.util.Locale; -import org.apache.hadoop.classification.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.services.s3.model.CopyObjectRequest; import software.amazon.awssdk.services.s3.model.CopyObjectResponse; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.GetObjectResponse; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import org.apache.hadoop.classification.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.classification.InterfaceAudience; @@ -435,7 +434,8 @@ private String logIfNull(String versionId, String uri) { CHANGE_DETECT_MODE + " set to " + Source.VersionId + " but no versionId available while reading {}. " + "Ensure your bucket has object versioning enabled. " - + "You may see inconsistent reads.", uri); + + "You may see inconsistent reads.", + uri); } return versionId; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index 6020f979fa3ee..e36842c39b731 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -18,10 +18,6 @@ package org.apache.hadoop.fs.s3a.impl; -import org.apache.hadoop.classification.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.model.CopyObjectRequest; @@ -30,6 +26,9 @@ import software.amazon.awssdk.services.s3.model.GetObjectResponse; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import org.apache.hadoop.classification.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.classification.InterfaceAudience; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java index 8f919897372ad..314d7cb82d1dd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java @@ -24,6 +24,7 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; @@ -39,7 +40,6 @@ import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.util.DurationInfo; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; import static org.apache.hadoop.util.Preconditions.checkArgument; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java index 275ad40c08df1..4926ff13f8d0c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java @@ -29,11 +29,10 @@ import java.util.Optional; import java.util.TreeMap; -import com.amazonaws.services.s3.Headers; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.model.CopyObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; @@ -77,50 +76,50 @@ public class HeaderProcessing extends AbstractStoreOperation { * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CACHE_CONTROL = - XA_HEADER_PREFIX + Headers.CACHE_CONTROL; + XA_HEADER_PREFIX + AWSHeaders.CACHE_CONTROL; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_DISPOSITION = - XA_HEADER_PREFIX + Headers.CONTENT_DISPOSITION; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_DISPOSITION; /** * Content encoding; can be configured: {@value}. */ public static final String XA_CONTENT_ENCODING = - XA_HEADER_PREFIX + Headers.CONTENT_ENCODING; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_ENCODING; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_LANGUAGE = - XA_HEADER_PREFIX + Headers.CONTENT_LANGUAGE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_LANGUAGE; /** * Length XAttr: {@value}. */ public static final String XA_CONTENT_LENGTH = - XA_HEADER_PREFIX + Headers.CONTENT_LENGTH; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_LENGTH; /** * Standard HTTP header found on some S3 objects: {@value}. */ public static final String XA_CONTENT_MD5 = - XA_HEADER_PREFIX + Headers.CONTENT_MD5; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_MD5; /** * Content range: {@value}. * This is returned on GET requests with ranges. */ public static final String XA_CONTENT_RANGE = - XA_HEADER_PREFIX + Headers.CONTENT_RANGE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_RANGE; /** * Content type: may be set when uploading. * {@value}. */ public static final String XA_CONTENT_TYPE = - XA_HEADER_PREFIX + Headers.CONTENT_TYPE; + XA_HEADER_PREFIX + AWSHeaders.CONTENT_TYPE; /** * Etag Header {@value}. @@ -128,14 +127,14 @@ public class HeaderProcessing extends AbstractStoreOperation { * it can be retrieved via {@code getFileChecksum(path)} if * the S3A connector is enabled. */ - public static final String XA_ETAG = XA_HEADER_PREFIX + Headers.ETAG; + public static final String XA_ETAG = XA_HEADER_PREFIX + AWSHeaders.ETAG; /** * last modified XAttr: {@value}. */ public static final String XA_LAST_MODIFIED = - XA_HEADER_PREFIX + Headers.LAST_MODIFIED; + XA_HEADER_PREFIX + AWSHeaders.LAST_MODIFIED; /* AWS Specific Headers. May not be found on other S3 endpoints. */ @@ -146,50 +145,50 @@ public class HeaderProcessing extends AbstractStoreOperation { * Value {@value}. */ public static final String XA_ARCHIVE_STATUS = - XA_HEADER_PREFIX + Headers.ARCHIVE_STATUS; + XA_HEADER_PREFIX + AWSHeaders.ARCHIVE_STATUS; /** * Object legal hold status. {@value}. */ public static final String XA_OBJECT_LOCK_LEGAL_HOLD_STATUS = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_LEGAL_HOLD_STATUS; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_LEGAL_HOLD_STATUS; /** * Object lock mode. {@value}. */ public static final String XA_OBJECT_LOCK_MODE = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_MODE; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_MODE; /** * ISO8601 expiry date of object lock hold. {@value}. */ public static final String XA_OBJECT_LOCK_RETAIN_UNTIL_DATE = - XA_HEADER_PREFIX + Headers.OBJECT_LOCK_RETAIN_UNTIL_DATE; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_LOCK_RETAIN_UNTIL_DATE; /** * Replication status for cross-region replicated objects. {@value}. */ public static final String XA_OBJECT_REPLICATION_STATUS = - XA_HEADER_PREFIX + Headers.OBJECT_REPLICATION_STATUS; + XA_HEADER_PREFIX + AWSHeaders.OBJECT_REPLICATION_STATUS; /** * Version ID; empty for non-versioned buckets/data. {@value}. */ public static final String XA_S3_VERSION_ID = - XA_HEADER_PREFIX + Headers.S3_VERSION_ID; + XA_HEADER_PREFIX + AWSHeaders.S3_VERSION_ID; /** * The server-side encryption algorithm to use * with AWS-managed keys: {@value}. */ public static final String XA_SERVER_SIDE_ENCRYPTION = - XA_HEADER_PREFIX + Headers.SERVER_SIDE_ENCRYPTION; + XA_HEADER_PREFIX + AWSHeaders.SERVER_SIDE_ENCRYPTION; /** * Storage Class XAttr: {@value}. */ public static final String XA_STORAGE_CLASS = - XA_HEADER_PREFIX + Headers.STORAGE_CLASS; + XA_HEADER_PREFIX + AWSHeaders.STORAGE_CLASS; /** * HTTP Referrer for logs: {@value}. @@ -328,7 +327,7 @@ private Map retrieveHeaders( maybeSetHeader(headers, XA_ETAG, md.eTag()); maybeSetHeader(headers, XA_LAST_MODIFIED, - Date.from(md.lastModified())); + Date.from(md.lastModified())); // AWS custom headers maybeSetHeader(headers, XA_ARCHIVE_STATUS, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java index 62ed2ba614514..4bb15f74965a9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java @@ -25,6 +25,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicLong; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,9 +43,6 @@ import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.OperationDuration; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; - import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 1e01253bbf2b4..bbf0c384456ee 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -24,10 +24,6 @@ import java.util.Map; import javax.annotation.Nullable; -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.core.SdkRequest; import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; @@ -52,6 +48,9 @@ import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.utils.Md5Utils; +import org.apache.hadoop.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.Retries; @@ -204,7 +203,7 @@ public StorageClass getStorageClass() { /** * Sets server side encryption parameters to the part upload * request when encryption is enabled. - * @param request upload part request + * @param builder upload part request builder */ protected void uploadPartEncryptionParameters( UploadPartRequest.Builder builder) { @@ -261,8 +260,7 @@ public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, */ protected void copyEncryptionParameters(CopyObjectRequest.Builder copyObjectRequestBuilder) { - final S3AEncryptionMethods algorithm - = getServerSideEncryptionAlgorithm(); + final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); if (S3AEncryptionMethods.SSE_S3 == algorithm) { copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); @@ -272,14 +270,16 @@ protected void copyEncryptionParameters(CopyObjectRequest.Builder copyObjectRequ EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey)); } else if (S3AEncryptionMethods.SSE_C == algorithm) { - EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { - copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.name()) - .copySourceSSECustomerKey(base64customerKey).copySourceSSECustomerKeyMD5( - Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))) - .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) - .sseCustomerKey(base64customerKey) - .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); - }); + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) + .ifPresent(base64customerKey -> { + copyObjectRequestBuilder.copySourceSSECustomerAlgorithm( + ServerSideEncryption.AES256.name()).copySourceSSECustomerKey(base64customerKey) + .copySourceSSECustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))) + .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey).sseCustomerKeyMD5( + Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); } } /** @@ -394,9 +394,9 @@ public AbortMultipartUploadRequest.Builder newAbortMultipartUploadRequestBuilder return prepareRequest(requestBuilder); } - private void multipartUploadEncryptionParameters(CreateMultipartUploadRequest.Builder mpuRequestBuilder) { - final S3AEncryptionMethods algorithm - = getServerSideEncryptionAlgorithm(); + private void multipartUploadEncryptionParameters( + CreateMultipartUploadRequest.Builder mpuRequestBuilder) { + final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); if (S3AEncryptionMethods.SSE_S3 == algorithm) { mpuRequestBuilder.serverSideEncryption(algorithm.getMethod()); @@ -523,18 +523,13 @@ public UploadPartRequest.Builder newUploadPartRequestBuilder( @Override public SelectObjectContentRequest.Builder newSelectRequestBuilder(String key) { SelectObjectContentRequest.Builder requestBuilder = - SelectObjectContentRequest.builder() - .bucket(bucket) - .key(key); + SelectObjectContentRequest.builder().bucket(bucket).key(key); - EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) - .ifPresent(base64customerKey -> { - requestBuilder - .sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) - .sseCustomerKey(base64customerKey) - .sseCustomerKeyMD5(Md5Utils.md5AsBase64( - Base64.getDecoder().decode(base64customerKey))); - }); + EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { + requestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) + .sseCustomerKey(base64customerKey) + .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); + }); return prepareRequest(requestBuilder); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java index 206d74e549d88..49c2fb8947dce 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/SDKStreamDrainer.java @@ -21,6 +21,7 @@ import java.io.InputStream; import java.util.concurrent.atomic.AtomicBoolean; +import software.amazon.awssdk.http.Abortable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,7 +29,6 @@ import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.util.functional.CallableRaisingIOE; -import software.amazon.awssdk.http.Abortable; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java index 65b5a4235133b..ec6e3700226e0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/prefetch/S3ARemoteObject.java @@ -21,6 +21,9 @@ import java.io.IOException; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,9 +38,6 @@ import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.fs.statistics.DurationTracker; -import software.amazon.awssdk.core.ResponseInputStream; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; /** * Encapsulates low level interactions with S3 object on AWS. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 63913afac79b1..ec68168bd0ffd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -33,11 +33,11 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import software.amazon.awssdk.services.s3.model.MultipartUpload; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.MultipartUpload; - import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java index 6ff195609cb10..c09acc8ba2220 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java @@ -40,15 +40,15 @@ */ public final class BlockingEnumeration implements Enumeration { private static final class Signal { - public final T element; - public final Throwable error; + private final T element; + private final Throwable error; - public Signal(T element) { + Signal(T element) { this.element = element; this.error = null; } - public Signal(Throwable error) { + Signal(Throwable error) { this.element = null; this.error = error; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java index 95cad54338344..c3b8abbc2ea88 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java @@ -21,6 +21,13 @@ import java.io.IOException; import java.util.Locale; +import software.amazon.awssdk.services.s3.model.CSVInput; +import software.amazon.awssdk.services.s3.model.CSVOutput; +import software.amazon.awssdk.services.s3.model.ExpressionType; +import software.amazon.awssdk.services.s3.model.InputSerialization; +import software.amazon.awssdk.services.s3.model.OutputSerialization; +import software.amazon.awssdk.services.s3.model.QuoteFields; +import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,14 +42,6 @@ import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.WriteOperationHelper; -import software.amazon.awssdk.services.s3.model.CSVInput; -import software.amazon.awssdk.services.s3.model.CSVOutput; -import software.amazon.awssdk.services.s3.model.ExpressionType; -import software.amazon.awssdk.services.s3.model.InputSerialization; -import software.amazon.awssdk.services.s3.model.OutputSerialization; -import software.amazon.awssdk.services.s3.model.QuoteFields; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; - import static org.apache.hadoop.util.Preconditions.checkNotNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java index a2f5f28dc4c87..3586d83a0a434 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java @@ -23,8 +23,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.http.AbortableInputStream; import org.apache.hadoop.util.Preconditions; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,8 +41,6 @@ import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; import org.apache.hadoop.io.IOUtils; -import software.amazon.awssdk.core.exception.AbortedException; -import software.amazon.awssdk.http.AbortableInputStream; import static org.apache.hadoop.util.Preconditions.checkNotNull; import static org.apache.commons.lang3.StringUtils.isNotEmpty; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java index c08793defaaff..8233e67eea0a5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java @@ -39,7 +39,10 @@ * Helper for SelectObjectContent queries against an S3 Bucket. */ public final class SelectObjectContentHelper { - + + private SelectObjectContentHelper() { + } + /** * Execute an S3 Select operation. * @param writeOperationHelperCallbacks helper callbacks @@ -90,8 +93,8 @@ public CompletableFuture eventPublisher( } @Override - public void responseReceived(SelectObjectContentResponse response) { - this.response = response; + public void responseReceived(SelectObjectContentResponse selectObjectContentResponse) { + this.response = selectObjectContentResponse; } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java index 188bb83ddadab..711b582300200 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/statistics/impl/AwsStatisticsCollector.java @@ -100,32 +100,32 @@ public void publish(MetricCollection metricCollection) { recurseThroughChildren(metricCollection) .collect(Collectors.toList()) .forEach(m -> { - counter(m, CoreMetric.RETRY_COUNT, retries -> { - collector.updateAwsRetryCount(retries); - collector.updateAwsRequestCount(retries + 1); - }); + counter(m, CoreMetric.RETRY_COUNT, retries -> { + collector.updateAwsRetryCount(retries); + collector.updateAwsRequestCount(retries + 1); + }); - counter(m, HttpMetric.HTTP_STATUS_CODE, statusCode -> { - if (statusCode == HttpStatusCode.THROTTLING) { - throttling[0] += 1; - } - }); + counter(m, HttpMetric.HTTP_STATUS_CODE, statusCode -> { + if (statusCode == HttpStatusCode.THROTTLING) { + throttling[0] += 1; + } + }); - timing(m, CoreMetric.API_CALL_DURATION, - collector::noteAwsClientExecuteTime); + timing(m, CoreMetric.API_CALL_DURATION, + collector::noteAwsClientExecuteTime); - timing(m, CoreMetric.SERVICE_CALL_DURATION, - collector::noteAwsRequestTime); + timing(m, CoreMetric.SERVICE_CALL_DURATION, + collector::noteAwsRequestTime); - timing(m, CoreMetric.MARSHALLING_DURATION, - collector::noteRequestMarshallTime); + timing(m, CoreMetric.MARSHALLING_DURATION, + collector::noteRequestMarshallTime); - timing(m, CoreMetric.SIGNING_DURATION, - collector::noteRequestSigningTime); + timing(m, CoreMetric.SIGNING_DURATION, + collector::noteRequestSigningTime); - timing(m, CoreMetric.UNMARSHALLING_DURATION, - collector::noteResponseProcessingTime); - }); + timing(m, CoreMetric.UNMARSHALLING_DURATION, + collector::noteResponseProcessingTime); + }); collector.updateAwsThrottleExceptionsCount(throttling[0]); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index 58078d1c6e04c..cd4c8e585ef68 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -32,8 +32,9 @@ import java.util.Map; import java.util.stream.Collectors; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,12 +56,11 @@ import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; +import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.shell.CommandFormat; import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.ExitUtil; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java index 869afddd5582a..09a22ddfe018b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.List; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -28,8 +31,6 @@ import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; /** * Operations which must be offered by the store for {@link MarkerTool}. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java index 1ffd2b7d4997b..ebc6fc9df9854 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java @@ -21,14 +21,15 @@ import java.io.IOException; import java.util.List; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; /** * Implement the marker tool operations by forwarding to the diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md index fa3347bc686c2..162f15951f5ca 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md @@ -15,10 +15,10 @@ # Upgrade S3A to AWS SDK V2: Changelog Note: This document is not meant to be committed as part of the final merge, and instead just serves -as a guide to help with reviewing the PR. +as a guide to help with reviewing the PR. This document tracks changes to S3A during the upgrade to AWS SDK V2. Once the upgrade -is complete, some of its content will be added to the existing document +is complete, some of its content will be added to the existing document [Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). This work is tracked in [HADOOP-18073](https://issues.apache.org/jira/browse/HADOOP-18073). @@ -47,7 +47,7 @@ This work is tracked in [HADOOP-18073](https://issues.apache.org/jira/browse/HAD * We now have two clients, a sync S3 Client and an async S3 Client. The async s3 client is required as the select operation is currently only supported on the async client. Once we are confident in the current set of changes, we will also be exploring moving other operations to the async client - as this could provide potential performance benefits. However those changes are not in the scope + as this could provide potential performance benefits. However those changes are not in the scope of this PR, and will be done separately. * The [createAwsConf](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java#L1190) method is now split into: @@ -56,11 +56,11 @@ method is now split into: createHttpClientBuilder* // sets max connections, connection timeout, socket timeout createProxyConfigurationBuilder // sets proxy config, defined in table below ``` - + The table below lists the configurations S3A was using and what they now map to. |SDK V1 |SDK V2 | -|--- |--- | +|---|---| |setMaxConnections |httpClientBuilder.maxConnections | |setProtocol |The protocol is now HTTPS by default, and can only be modified by setting an HTTP endpoint on the client builder. This is done when setting the endpoint in getS3Endpoint() | |setMaxErrorRetry |createRetryPolicyBuilder | @@ -193,11 +193,11 @@ in ` multipartUploadEncryptionParameters`. ### GetObject -* Previously, GetObject returned a `S3Object` response which exposed its content in a - `S3ObjectInputStream` through the `getObjectContent()` method. In SDK v2, the response is - directly a `ResponseInputStream` with the content, while the +* Previously, GetObject returned a `S3Object` response which exposed its content in a + `S3ObjectInputStream` through the `getObjectContent()` method. In SDK v2, the response is + directly a `ResponseInputStream` with the content, while the `GetObjectResponse` instance can be retrieved by calling `response()` on it. -* The above change simplifies managing the lifetime of the response input stream. In v1, +* The above change simplifies managing the lifetime of the response input stream. In v1, `S3AInputStream` had to keep a reference to the `S3Object` while holding the wrapped `S3ObjectInputStream`. When upgraded to SDK v2, it can simply wrap the new `ResponseInputStream`, which handles lifetime correctly. Same applies @@ -233,14 +233,14 @@ In order to adapt the new API in S3A, three new classes have been introduced in future returned by the select call and wraps the original publisher. This class provides a `toRecordsInputStream()` method which returns an input stream containing the results, reproducing the behaviour of the old `SelectRecordsInputStream`. -* `BlockingEnumeration`: an adapter which lazily requests new elements from the publisher and +* `BlockingEnumeration`: an adapter which lazily requests new elements from the publisher and exposes them through an `Enumeration` interface. Used in `SelectEventStreamPublisher.toRecordsInputStream()` to adapt the event publisher into an enumeration of input streams, eventually passed to a `SequenceInputStream`. Note that the "lazy" behaviour means that new elements are requested only on `read()` calls on the input stream. - + ### CredentialsProvider @@ -258,7 +258,7 @@ In order to adapt the new API in S3A, three new classes have been introduced in as `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`, then map it to V2’s |`fs.s3a.aws.credentials.provider` value |Mapped to | -|--- |--- | +|---|---| |`com.amazonaws.auth.EnvironmentVariableCredentialsProvider` |`software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | |`com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | |`com.amazonaws.auth.`InstanceProfileCredentialsProvider`` |`org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | @@ -266,14 +266,14 @@ In order to adapt the new API in S3A, three new classes have been introduced in ### Auditing -The SDK v2 offers a new `ExecutionInterceptor` -[interface](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) -which broadly replaces the `RequestHandler2` abstract class from v1. +The SDK v2 offers a new `ExecutionInterceptor` +[interface](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html) +which broadly replaces the `RequestHandler2` abstract class from v1. Switching to the new mechanism in S3A brings: * Simplification in `AWSAuditEventCallbacks` (and implementors) which can now extend `ExecutionInterceptor` -* "Registering" a Span with a request has moved from `requestCreated` to `beforeExecution` +* "Registering" a Span with a request has moved from `requestCreated` to `beforeExecution` (where an `ExecutionAttributes` instance is first available) * The ReferrerHeader is built and added to the http request in `modifyHttpRequest`, rather than in `beforeExecution`, where no http request is yet available @@ -283,10 +283,10 @@ Switching to the new mechanism in S3A brings: under "Interceptor Registration", which could make it redundant. In the Transfer Manager, `TransferListener` replaces `TransferStateChangeListener`. S3A code -has been updated and `AuditManagerS3A` implementations now provide an instance of the former to -switch to the active span, but registration of the new listeners is currently commented out because +has been updated and `AuditManagerS3A` implementations now provide an instance of the former to +switch to the active span, but registration of the new listeners is currently commented out because it causes an incompatibility issue with the internal logger, resulting in `NoSuchMethodError`s, -at least in the current TransferManager Preview release. +at least in the current TransferManager Preview release. ### Metric Collection @@ -295,46 +295,46 @@ at least in the current TransferManager Preview release. and collect the metrics from a `MetricCollection` object. The following table maps SDK v2 metrics to their equivalent in v1: -| v2 Metrics | com.amazonaws.util.AWSRequestMetrics.Field | Comment | +| v2 Metrics| com.amazonaws.util.AWSRequestMetrics.Field| Comment| |-------------------------------------------------------------|---------------------------------------------|--------------------------------| -| CoreMetric.RETRY_COUNT | HttpClientRetryCount | | -| CoreMetric.RETRY_COUNT | RequestCount | always HttpClientRetryCount+1 | -| HttpMetric.HTTP_STATUS_CODE with HttpStatusCode.THROTTLING | ThrottleException | to be confirmed | -| CoreMetric.API_CALL_DURATION | ClientExecuteTime | | -| CoreMetric.SERVICE_CALL_DURATION | HttpRequestTime | | -| CoreMetric.MARSHALLING_DURATION | RequestMarshallTime | | -| CoreMetric.SIGNING_DURATION | RequestSigningTime | | -| CoreMetric.UNMARSHALLING_DURATION | ResponseProcessingTime | to be confirmed | +| CoreMetric.RETRY_COUNT| HttpClientRetryCount|| +| CoreMetric.RETRY_COUNT| RequestCount| always HttpClientRetryCount+1| +| HttpMetric.HTTP_STATUS_CODE with HttpStatusCode.THROTTLING| ThrottleException| to be confirmed| +| CoreMetric.API_CALL_DURATION| ClientExecuteTime|| +| CoreMetric.SERVICE_CALL_DURATION| HttpRequestTime|| +| CoreMetric.MARSHALLING_DURATION| RequestMarshallTime|| +| CoreMetric.SIGNING_DURATION| RequestSigningTime|| +| CoreMetric.UNMARSHALLING_DURATION| ResponseProcessingTime| to be confirmed| Note that none of the timing metrics (`*_DURATION`) are currently collected in S3A. ### Exception Handling -The code to handle exceptions thrown by the SDK has been updated to reflect the changes in v2: +The code to handle exceptions thrown by the SDK has been updated to reflect the changes in v2: * `com.amazonaws.SdkBaseException` and `com.amazonaws.AmazonClientException` changes: - * These classes have combined and replaced with + * These classes have combined and replaced with `software.amazon.awssdk.core.exception.SdkException`. * `com.amazonaws.SdkClientException` changes: * This class has been replaced with `software.amazon.awssdk.core.exception.SdkClientException`. * This class now extends `software.amazon.awssdk.core.exception.SdkException`. * `com.amazonaws.AmazonServiceException` changes: - * This class has been replaced with + * This class has been replaced with `software.amazon.awssdk.awscore.exception.AwsServiceException`. - * This class now extends `software.amazon.awssdk.core.exception.SdkServiceException`, + * This class now extends `software.amazon.awssdk.core.exception.SdkServiceException`, a new exception type that extends `software.amazon.awssdk.core.exception.SdkException`. -See also the +See also the [SDK changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#3-exception-changes). ### Failure Injection -While using the SDK v1, failure injection was implemented in `InconsistentAmazonS3CClient`, -which extended the S3 client. In SDK v2, reproducing this approach would not be straightforward, -since the default S3 client is an internal final class. Instead, the same fault injection strategy -is now performed by a `FailureInjectionInterceptor` (see -[ExecutionInterceptor](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html)) -registered on the default client by `InconsistentS3CClientFactory`. +While using the SDK v1, failure injection was implemented in `InconsistentAmazonS3CClient`, +which extended the S3 client. In SDK v2, reproducing this approach would not be straightforward, +since the default S3 client is an internal final class. Instead, the same fault injection strategy +is now performed by a `FailureInjectionInterceptor` (see +[ExecutionInterceptor](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html)) +registered on the default client by `InconsistentS3CClientFactory`. `InconsistentAmazonS3CClient` has been removed. No changes to the user configuration are required. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index c08108f096458..d1647fb3b2f54 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -20,6 +20,10 @@ import static org.apache.hadoop.fs.s3a.Constants.*; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.services.s3.S3Client; + import java.net.URI; import org.apache.hadoop.conf.Configuration; @@ -29,9 +33,6 @@ import org.junit.Rule; import org.junit.rules.ExpectedException; -import software.amazon.awssdk.awscore.exception.AwsErrorDetails; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.services.s3.S3Client; /** * Abstract base class for S3A unit tests using a mock S3 client and a null @@ -53,7 +54,7 @@ public abstract class AbstractS3AMockTest { public ExpectedException exception = ExpectedException.none(); protected S3AFileSystem fs; - protected S3Client s3V2; + protected S3Client s3; @Before public void setup() throws Exception { @@ -63,7 +64,7 @@ public void setup() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); - s3V2 = fs.getAmazonS3V2ClientForTesting("mocking"); + s3 = fs.getAmazonS3V2ClientForTesting("mocking"); } public Configuration createConfiguration() { @@ -82,7 +83,7 @@ public Configuration createConfiguration() { } public S3Client getS3Client() { - return s3V2; + return s3; } @After diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 9e58dba5c9b28..6d1b10954e7c5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -32,11 +32,11 @@ import org.junit.Test; import org.junit.rules.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getCSVTestPath; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index cf7d822095801..1b65b5ded9ae9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -20,10 +20,6 @@ import java.util.List; -import org.assertj.core.api.Assertions; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.GetObjectAclRequest; import software.amazon.awssdk.services.s3.model.GetObjectAclResponse; @@ -31,6 +27,10 @@ import software.amazon.awssdk.services.s3.model.Grantee; import software.amazon.awssdk.services.s3.model.Permission; import software.amazon.awssdk.services.s3.model.Type; +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java index bcc37c8bfbbba..4f1dcdfd5238b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AClientSideEncryptionKms.java @@ -21,11 +21,11 @@ import java.io.IOException; import java.util.Map; -import com.amazonaws.services.s3.Headers; import org.assertj.core.api.Assertions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.AWSHeaders; import org.apache.hadoop.fs.s3a.impl.HeaderProcessing; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; @@ -69,14 +69,14 @@ protected void assertEncrypted(Path path) throws IOException { // Assert KeyWrap Algo assertEquals("Key wrap algo isn't same as expected", KMS_KEY_WRAP_ALGO, processHeader(fsXAttrs, - xAttrPrefix + Headers.CRYPTO_KEYWRAP_ALGORITHM)); + xAttrPrefix + AWSHeaders.CRYPTO_KEYWRAP_ALGORITHM)); // Assert content encryption algo for KMS, is present in the // materials description and KMS key ID isn't. String keyId = getS3EncryptionKey(getTestBucketName(getConfiguration()), getConfiguration()); Assertions.assertThat(processHeader(fsXAttrs, - xAttrPrefix + Headers.MATERIALS_DESCRIPTION)) + xAttrPrefix + AWSHeaders.MATERIALS_DESCRIPTION)) .describedAs("Materials Description should contain the content " + "encryption algo and should not contain the KMS keyID.") .contains(KMS_CONTENT_ENCRYPTION_ALGO) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index bc7b2ce3d1b06..ad2c16bae1d9e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -19,6 +19,10 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.ClientConfiguration; +import software.amazon.awssdk.core.client.config.SdkClientConfiguration; +import software.amazon.awssdk.core.client.config.SdkClientOption; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.reflect.FieldUtils; @@ -50,10 +54,6 @@ import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; -import software.amazon.awssdk.core.client.config.SdkClientConfiguration; -import software.amazon.awssdk.core.client.config.SdkClientOption; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.S3Configuration; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index 48f3fdf91d323..856f8e7598bcd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -18,6 +18,9 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.S3Error; + import org.assertj.core.api.Assertions; import org.junit.Assume; @@ -32,8 +35,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; -import software.amazon.awssdk.services.s3.model.S3Error; import java.io.IOException; import java.util.ArrayList; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 1bf874b103716..28625e5755d18 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -25,14 +25,14 @@ import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; -import org.assertj.core.api.Assertions; -import org.junit.Assume; -import org.junit.Test; - import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.GetBucketEncryptionRequest; import software.amazon.awssdk.services.s3.model.GetBucketEncryptionResponse; import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonPathCapabilities; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java index 5dc38344de080..197811f39fb9b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMultipartUtils.java @@ -18,13 +18,13 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.services.s3.model.MultipartUpload; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.junit.Test; -import software.amazon.awssdk.services.s3.model.MultipartUpload; import java.io.IOException; import java.util.HashSet; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java index a77e0ac705854..35bb709f659f9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java @@ -25,13 +25,13 @@ import java.time.OffsetDateTime; import java.util.concurrent.TimeUnit; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.Credentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.sts.StsClient; -import software.amazon.awssdk.services.sts.StsClientBuilder; -import software.amazon.awssdk.services.sts.model.Credentials; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 5f913f437f553..b7e55f01a371e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.net.URI; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.S3Client; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,9 +51,6 @@ import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.util.Progressable; -import software.amazon.awssdk.core.SdkRequest; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.services.s3.S3Client; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditor; import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTrackerFactory; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java index 8f1cca4ebf607..2b7620ddbddfc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MultipartTestUtils.java @@ -18,6 +18,10 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -27,11 +31,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.model.MultipartUpload; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; -import software.amazon.awssdk.services.s3.model.UploadPartResponse; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 8fde6395d594f..6191687c57e64 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -61,12 +61,12 @@ import org.apache.hadoop.util.functional.CallableRaisingIOE; import org.apache.hadoop.util.functional.FutureIO; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Assume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import java.io.Closeable; import java.io.File; @@ -674,7 +674,7 @@ public static MarshalledCredentials requestSessionCredentials( ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT), duration, new Invoker(new S3ARetryPolicy(conf), Invoker.LOG_EVENT), - bucket ); + bucket); sc.validate("requested session credentials: ", MarshalledCredentials.CredentialTypeRequired.SessionOnly); return sc; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index e8bcd30b5f160..8cf8d2db67d44 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -18,12 +18,12 @@ package org.apache.hadoop.fs.s3a; +import software.amazon.awssdk.regions.Region; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.regions.Region; import org.apache.hadoop.test.HadoopTestBase; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java index ab20762674521..0ac49812e4cb6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java @@ -26,6 +26,10 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -34,10 +38,6 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.core.exception.SdkClientException; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.services.s3.model.S3Exception; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Invoker.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 4c56cd5c1c0ff..7c3d6d8548bf0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -34,14 +34,14 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import org.apache.hadoop.util.Sets; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; +import org.apache.hadoop.util.Sets; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index fab0b67b376a7..a89f1744fd2f9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -27,15 +27,16 @@ import java.net.URI; import java.util.Date; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.junit.Test; import org.mockito.ArgumentMatcher; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; /** * deleteOnExit test for S3A. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index c41752fb2b0b7..fd186e51427cc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -35,15 +35,16 @@ import java.util.concurrent.ExecutionException; import java.util.function.Consumer; -import org.junit.Test; - -import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; - import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.model.S3Exception; +import org.junit.Test; + +import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; + + import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 541f3b0486191..1a2a21a6e5111 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -29,13 +29,6 @@ import java.util.Date; import java.util.List; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; - -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.junit.Test; -import org.mockito.ArgumentMatcher; - import software.amazon.awssdk.services.s3.model.CommonPrefix; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; @@ -45,6 +38,14 @@ import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; import software.amazon.awssdk.services.s3.model.S3Object; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; + +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Test; +import org.mockito.ArgumentMatcher; + + /** * S3A tests for getFileStatus using mock S3 client. */ @@ -57,8 +58,7 @@ public void testFile() throws Exception { HeadObjectResponse objectMetadata = HeadObjectResponse.builder().contentLength(1L).lastModified(new Date(2L).toInstant()) .build(); - when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) - .thenReturn(objectMetadata); + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))).thenReturn(objectMetadata); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -75,14 +75,14 @@ public void testFile() throws Exception { public void testFakeDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); String keyDir = key + "/"; List s3Objects = new ArrayList<>(1); s3Objects.add(S3Object.builder().key(keyDir).size(0L).build()); ListObjectsV2Response listObjectsV2Response = ListObjectsV2Response.builder().contents(s3Objects).build(); - when(s3V2.listObjectsV2(argThat( + when(s3.listObjectsV2(argThat( matchListV2Request(BUCKET, keyDir)) )).thenReturn(listObjectsV2Response); FileStatus stat = fs.getFileStatus(path); @@ -95,9 +95,9 @@ public void testFakeDirectory() throws Exception { public void testImplicitDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3V2.headObject(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/")) )).thenThrow(NOT_FOUND); setupListMocks(Collections.singletonList(CommonPrefix.builder().prefix("dir/").build()), @@ -116,9 +116,9 @@ public void testImplicitDirectory() throws Exception { public void testRoot() throws Exception { Path path = new Path("/"); String key = path.toUri().getPath().substring(1); - when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3V2.headObject(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -133,9 +133,9 @@ public void testRoot() throws Exception { public void testNotFound() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); - when(s3V2.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) + when(s3.headObject(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); - when(s3V2.headObject(argThat( + when(s3.headObject(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); setupListMocks(Collections.emptyList(), Collections.emptyList()); @@ -150,14 +150,14 @@ private void setupListMocks(List prefixes, .commonPrefixes(prefixes) .contents(s3Objects) .build(); - when(s3V2.listObjects(any(ListObjectsRequest.class))).thenReturn(v1Response); + when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(v1Response); // V2 list API mock ListObjectsV2Response v2Result = ListObjectsV2Response.builder() .commonPrefixes(prefixes) .contents(s3Objects) .build(); - when(s3V2.listObjectsV2( + when(s3.listObjectsV2( any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))).thenReturn( v2Result); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java index 58f045828c77c..da1284343da9f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java @@ -25,6 +25,12 @@ import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import org.junit.Test; import org.apache.commons.io.IOUtils; @@ -33,12 +39,6 @@ import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.util.functional.CallableRaisingIOE; -import software.amazon.awssdk.awscore.exception.AwsErrorDetails; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.core.ResponseInputStream; -import software.amazon.awssdk.http.AbortableInputStream; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; import static java.lang.Math.min; import static org.apache.hadoop.util.functional.FutureIO.eval; @@ -133,7 +133,7 @@ private S3AInputStream.InputStreamCallbacks getMockedInputStreamCallback() { getMockedInputStream(objectResponse, true), getMockedInputStream(objectResponse, true), getMockedInputStream(objectResponse, false) - }; + }; return new S3AInputStream.InputStreamCallbacks() { private Integer mockedS3ObjectIndex = 0; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java index 43f55a2a7c48d..643db02087b46 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AUnbuffer.java @@ -18,11 +18,6 @@ package org.apache.hadoop.fs.s3a; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; - -import org.junit.Test; - import software.amazon.awssdk.core.ResponseInputStream; import software.amazon.awssdk.http.AbortableInputStream; import software.amazon.awssdk.services.s3.model.GetObjectRequest; @@ -30,6 +25,12 @@ import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; + +import org.junit.Test; + + import java.io.IOException; import java.io.InputStream; import java.time.Instant; @@ -60,7 +61,7 @@ public void testUnbuffer() throws IOException { .lastModified(Instant.ofEpochMilli(2L)) .eTag("mock-etag") .build(); - when(s3V2.headObject((HeadObjectRequest) any())).thenReturn(objectMetadata); + when(s3.headObject((HeadObjectRequest) any())).thenReturn(objectMetadata); // Create mock ResponseInputStream and GetObjectResponse for open() GetObjectResponse objectResponse = GetObjectResponse.builder() @@ -75,7 +76,7 @@ public void testUnbuffer() throws IOException { ResponseInputStream getObjectResponseInputStream = new ResponseInputStream(objectResponse, AbortableInputStream.create(objectStream, () -> {})); - when(s3V2.getObject((GetObjectRequest) any())).thenReturn(getObjectResponseInputStream); + when(s3.getObject((GetObjectRequest) any())).thenReturn(getObjectResponseInputStream); // Call read and then unbuffer FSDataInputStream stream = fs.open(path); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index 55b45a0399dd1..66d9032e858eb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -18,10 +18,6 @@ package org.apache.hadoop.fs.s3a; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.model.CopyObjectRequest; @@ -29,6 +25,10 @@ import software.amazon.awssdk.services.s3.model.CopyObjectResult; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index a629f1c478a79..3f3b3149d426e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -26,6 +26,12 @@ import java.util.function.Consumer; import java.util.stream.Collectors; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import org.junit.After; import org.junit.Before; import org.slf4j.Logger; @@ -40,13 +46,6 @@ import org.apache.hadoop.fs.store.audit.AuditSpan; import org.apache.hadoop.test.AbstractHadoopTestBase; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; -import software.amazon.awssdk.core.interceptor.InterceptorContext; -import software.amazon.awssdk.http.SdkHttpMethod; -import software.amazon.awssdk.http.SdkHttpRequest; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; - import static org.apache.hadoop.fs.s3a.Statistic.INVOCATION_GET_FILE_STATUS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.UNAUDITED_OPERATION; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.createIOStatisticsStoreForAuditing; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java index 252500c0c1056..4f476604332b1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditIntegration.java @@ -22,6 +22,12 @@ import java.nio.file.AccessDeniedException; import java.util.List; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.http.SdkHttpMethod; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -36,12 +42,6 @@ import org.apache.hadoop.service.Service; import org.apache.hadoop.test.AbstractHadoopTestBase; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; -import software.amazon.awssdk.core.interceptor.InterceptorContext; -import software.amazon.awssdk.http.SdkHttpMethod; -import software.amazon.awssdk.http.SdkHttpRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.attachSpanToRequest; @@ -199,7 +199,6 @@ public void testSingleExecutionInterceptor() throws Throwable { // test the basic pre-request sequence while avoiding // the complexity of recreating the full sequence // (and probably getting it wrong) - // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html interceptor.beforeExecution(context, attributes); interceptor.modifyRequest(context, attributes); interceptor.beforeMarshalling(context, attributes); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java index af0d397e58ff2..e5e4afc434c8e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestAuditSpanLifecycle.java @@ -20,13 +20,13 @@ import java.util.List; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import org.junit.Before; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.store.audit.AuditSpan; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.noopAuditConfig; import static org.assertj.core.api.Assertions.assertThat; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index 59f35b6194965..991379e435d91 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.regex.Matcher; +import software.amazon.awssdk.http.SdkHttpRequest; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -36,7 +37,6 @@ import org.apache.hadoop.fs.store.audit.HttpReferrerAuditHeader; import org.apache.hadoop.security.UserGroupInformation; -import software.amazon.awssdk.http.SdkHttpRequest; import static org.apache.hadoop.fs.audit.AuditConstants.DELETE_KEYS_SIZE; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java index 0af43a1c1db6c..0059e5b6c5392 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestLoggingAuditor.java @@ -18,6 +18,12 @@ package org.apache.hadoop.fs.s3a.audit; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.InterceptorContext; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -27,12 +33,6 @@ import org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor; import org.apache.hadoop.fs.store.audit.AuditSpan; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; -import software.amazon.awssdk.core.interceptor.InterceptorContext; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; -import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; -import software.amazon.awssdk.transfer.s3.progress.TransferListener; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.loggingAuditConfig; import static org.assertj.core.api.Assertions.assertThat; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 5c638f6a9ce19..70d91ba7b113f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -26,14 +26,14 @@ import java.util.List; import java.util.stream.IntStream; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.services.sts.model.StsException; import com.fasterxml.jackson.core.JsonProcessingException; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.services.sts.model.StsException; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java index 9ad7b26cb6512..b9d547635f7f3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java @@ -21,9 +21,9 @@ import java.net.URI; import java.net.URISyntaxException; +import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.junit.Before; import org.junit.Test; -import software.amazon.awssdk.auth.credentials.AwsCredentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java index 54a3a337ede25..dd513055b91ac 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java @@ -26,14 +26,14 @@ import java.net.URI; import java.nio.file.AccessDeniedException; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -332,7 +332,7 @@ public void testDelegatedFileSystem() throws Throwable { // force a probe of the remote FS to make sure its endpoint is valid // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would // return some metadata. (bucket region, content type). headObject() fails without a key, check - // how this can be fixed. + // how this can be fixed. // fs.getObjectMetadata(new Path("/")); readLandsatMetadata(fs); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java index 629538a379638..7f13cb3a4d161 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java @@ -22,12 +22,12 @@ import java.io.IOException; import java.net.URI; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.hamcrest.Matchers; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index 811eacfc98f65..e64822d8c8802 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -29,6 +29,20 @@ import java.util.UUID; import java.util.stream.Collectors; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; +import software.amazon.awssdk.services.s3.model.MultipartUpload; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.util.Lists; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.junit.AfterClass; @@ -66,20 +80,6 @@ import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.test.HadoopTestBase; -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; -import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; -import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; -import software.amazon.awssdk.services.s3.model.MultipartUpload; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; -import software.amazon.awssdk.services.s3.model.UploadPartResponse; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java index a91f70c4077be..f96cf97ebd7f4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestDirectoryCommitterScale.java @@ -27,6 +27,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import software.amazon.awssdk.services.s3.model.CompletedPart; + import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.assertj.core.api.Assertions; import org.junit.AfterClass; @@ -37,7 +39,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.CompletedPart; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java index 5df2a6563db15..71ed0b6891a58 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java @@ -31,6 +31,9 @@ import java.util.UUID; import java.util.stream.Collectors; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import org.apache.hadoop.util.Sets; import org.assertj.core.api.Assertions; @@ -66,9 +69,6 @@ import org.apache.hadoop.mapreduce.task.JobContextImpl; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java index 2f86ae75e8666..6ace7462e78a6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java @@ -24,6 +24,7 @@ import java.util.Set; import java.util.UUID; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Sets; import org.assertj.core.api.Assertions; @@ -35,7 +36,6 @@ import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.mapreduce.JobContext; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; import static org.apache.hadoop.test.LambdaTestUtils.intercept; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java index ce439d89d7f4e..73db942973211 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestRenameDeleteRace.java @@ -23,12 +23,13 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import software.amazon.awssdk.core.exception.SdkException; + import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.exception.SdkException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java index f69870afe50c9..b521a81a94942 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java @@ -66,7 +66,7 @@ public void testXAttrRoot() throws Throwable { describe("Test xattr on root"); // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would // return some metadata. (bucket region, content type). headObject() fails without a key, check - // how this can be fixed. + // how this can be fixed. Path root = new Path("/"); S3AFileSystem fs = getFileSystem(); Map xAttrs = verifyMetrics( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java index 7883fa4c83f39..9d026fd90ee50 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java @@ -26,12 +26,12 @@ import java.util.List; import java.util.Map; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.assertj.core.api.Assertions; import org.assertj.core.util.Lists; import org.junit.Before; import org.junit.Test; -import software.amazon.awssdk.services.s3.model.CopyObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.MockS3AFileSystem; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index 92481388e876d..b16204a260b09 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -22,15 +22,15 @@ import java.util.ArrayList; import java.util.concurrent.atomic.AtomicLong; +import software.amazon.awssdk.awscore.AwsRequest; +import software.amazon.awssdk.core.SdkRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.awscore.AwsRequest; -import software.amazon.awssdk.core.SdkRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; -import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java index 85970a65887f4..7042737b31085 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestSDKStreamDrainer.java @@ -21,12 +21,12 @@ import java.io.IOException; import java.io.InputStream; +import software.amazon.awssdk.http.Abortable; import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.hadoop.test.HadoopTestBase; -import software.amazon.awssdk.http.Abortable; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DRAIN_BUFFER_SIZE; import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_INPUT_STREAM_STATISTICS; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index ce066b0b3ecc7..9555e8316380c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -27,6 +27,10 @@ import java.util.concurrent.Callable; import java.util.stream.Collectors; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -45,10 +49,6 @@ import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.store.audit.AuditSpan; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java index 0c1d402305c0e..5fbbc3a127997 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/MockS3ARemoteObject.java @@ -23,16 +23,16 @@ import java.io.IOException; import java.util.concurrent.CompletableFuture; -import org.apache.hadoop.fs.impl.prefetch.Validate; -import org.apache.hadoop.fs.s3a.S3AInputStream; -import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; -import org.apache.hadoop.util.functional.CallableRaisingIOE; - import software.amazon.awssdk.core.ResponseInputStream; import software.amazon.awssdk.http.AbortableInputStream; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import org.apache.hadoop.fs.impl.prefetch.Validate; +import org.apache.hadoop.fs.s3a.S3AInputStream; +import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; +import org.apache.hadoop.util.functional.CallableRaisingIOE; + /** * A mock s3 file with some fault injection. */ diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java index cb01387b21736..2ceaec2477393 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/prefetch/S3APrefetchFakes.java @@ -31,6 +31,11 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; @@ -57,10 +62,6 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.functional.CallableRaisingIOE; -import software.amazon.awssdk.core.ResponseInputStream; -import software.amazon.awssdk.http.AbortableInputStream; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; import static org.apache.hadoop.fs.s3a.Constants.BUFFER_DIR; import static org.apache.hadoop.fs.s3a.Constants.HADOOP_TMP_DIR; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java index 7c3398ce561d7..b586fb7dbabc6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ILoadTestS3ABulkDeleteThrottling.java @@ -29,6 +29,7 @@ import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.assertj.core.api.Assertions; @@ -51,7 +52,6 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.BULK_DELETE_PAGE_SIZE; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java index 2d380a9aef6d0..db9093ea18414 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java @@ -42,8 +42,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectResponse; import java.io.IOException; import java.io.InputStream; @@ -53,6 +51,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; + import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_KEEP; import static org.apache.hadoop.fs.s3a.Statistic.*; From aa1cf19ffaa0c1b7469e79dab5ac4f6b2cd02eba Mon Sep 17 00:00:00 2001 From: Alessandro Passaro Date: Mon, 28 Nov 2022 11:36:20 +0000 Subject: [PATCH 03/13] Move MultiObjectDeleteException to impl --- .../src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 1 + .../src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java | 1 + .../hadoop/fs/s3a/{ => impl}/MultiObjectDeleteException.java | 4 +++- .../org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java | 1 - .../main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java | 2 +- .../org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java | 2 +- .../apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java | 1 + .../apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java | 1 - .../apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java | 2 +- 10 files changed, 10 insertions(+), 7 deletions(-) rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/{ => impl}/MultiObjectDeleteException.java (96%) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 324e03ff8a465..66ba106297842 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -136,6 +136,7 @@ import org.apache.hadoop.fs.s3a.impl.InternalConstants; import org.apache.hadoop.fs.s3a.impl.ListingOperationCallbacks; import org.apache.hadoop.fs.s3a.impl.MkdirOperation; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OpenFileSupport; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 07cbfd268278f..d44b8cc82829c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -46,6 +46,7 @@ import org.apache.hadoop.fs.s3a.audit.AuditIntegration; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.NetworkBinding; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java similarity index 96% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java index 4166bcea90d37..6082c2f08daff 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/MultiObjectDeleteException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a; +package org.apache.hadoop.fs.s3a.impl; import java.io.IOException; import java.nio.file.AccessDeniedException; @@ -29,6 +29,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.s3a.AWSS3IOException; +import org.apache.hadoop.fs.s3a.S3AFileSystem; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_200_OK; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java index 70c6165c635cb..e0d9c7c6aada7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index cd4c8e585ef68..ef8413ccf0a64 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -54,9 +54,9 @@ import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.shell.CommandFormat; import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.ExitUtil; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java index 09a22ddfe018b..7aaec40a86805 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java @@ -27,9 +27,9 @@ import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java index ebc6fc9df9854..d7c77feed1083 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperationsImpl.java @@ -26,8 +26,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index 856f8e7598bcd..a741b11b0ce47 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.statistics.StoreStatisticNames; import org.apache.hadoop.fs.store.audit.AuditSpan; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java index 3a91e4026a872..641bf7a2d074d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java @@ -39,7 +39,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.DurationInfo; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java index 0d74ff52f46f2..004e15676a04a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/MinimalOperationCallbacks.java @@ -28,11 +28,11 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AReadOpContext; import org.apache.hadoop.fs.s3a.S3ObjectAttributes; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; /** From 1ab7e6827d02f679b595c5616d25e8292236b1d1 Mon Sep 17 00:00:00 2001 From: Alessandro Passaro Date: Mon, 28 Nov 2022 11:51:48 +0000 Subject: [PATCH 04/13] Reinstate old constants --- .../org/apache/hadoop/fs/s3a/impl/InternalConstants.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 14e1fdc8cb5eb..7af82f70aebf6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -128,9 +128,17 @@ private InternalConstants() { /** 403 status code: Forbidden. */ public static final int SC_403_FORBIDDEN = 403; + /** 403 error code. */ + @Deprecated + public static final int SC_403 = SC_403_FORBIDDEN; + /** 404 status code: Not Found. */ public static final int SC_404_NOT_FOUND = 404; + /** 404 error code. */ + @Deprecated + public static final int SC_404 = SC_404_NOT_FOUND; + /** 405 status code: Method Not Allowed. */ public static final int SC_405_METHOD_NOT_ALLOWED = 405; From 0c2fd282526685af2bcd00550e644dea7b0c4c68 Mon Sep 17 00:00:00 2001 From: Alessandro Passaro Date: Mon, 28 Nov 2022 14:43:27 +0000 Subject: [PATCH 05/13] Move TransferManager initialization to ClientFactory --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 34 ++++++++-- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 34 ++-------- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 66 ++++++++++++++++++- .../hadoop/fs/s3a/MockS3ClientFactory.java | 9 +++ 4 files changed, 106 insertions(+), 37 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 64c62f8876f93..098ac9e026959 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -64,6 +64,7 @@ import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -123,9 +124,6 @@ public class DefaultS3ClientFactory extends Configured /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); - /** Bucket name. */ - private String bucket; - /** * Create the client by preparing the AwsConf configuration * and then invoking {@code buildAmazonS3Client()}. @@ -136,7 +134,7 @@ public AmazonS3 createS3Client( final URI uri, final S3ClientCreationParameters parameters) throws IOException { Configuration conf = getConf(); - bucket = uri.getHost(); + String bucket = uri.getHost(); final ClientConfiguration awsConf = S3AUtils .createAwsConf(conf, bucket, @@ -172,6 +170,7 @@ public AmazonS3 createS3Client( .equals(encryptionMethods.getMethod())) { return buildAmazonS3EncryptionClient( awsConf, + bucket, parameters); } else { return buildAmazonS3Client( @@ -192,7 +191,7 @@ public S3Client createS3ClientV2( final S3ClientCreationParameters parameters) throws IOException { Configuration conf = getConf(); - bucket = uri.getHost(); + String bucket = uri.getHost(); ApacheHttpClient.Builder httpClientBuilder = AWSClientConfig .createHttpClientBuilder(conf) @@ -208,7 +207,7 @@ public S3AsyncClient createS3AsyncClient( final S3ClientCreationParameters parameters) throws IOException { Configuration conf = getConf(); - bucket = uri.getHost(); + String bucket = uri.getHost(); NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig .createAsyncHttpClientBuilder(conf) .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket)); @@ -217,6 +216,26 @@ public S3AsyncClient createS3AsyncClient( .build(); } + @Override + public S3TransferManager createS3TransferManager( + final URI uri, + final S3ClientCreationParameters parameters) + throws IOException { + Configuration conf = getConf(); + String bucket = uri.getHost(); + Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, + parameters.getCredentialSet()); + return S3TransferManager.builder() + .s3ClientConfiguration(clientConfiguration -> + clientConfiguration + .minimumPartSizeInBytes(parameters.getMinimumPartSize()) + .credentialsProvider(parameters.getCredentialSet()) + .region(region)) + .transferConfiguration(transferConfiguration -> + transferConfiguration.executor(parameters.getTransferManagerExecutor())) + .build(); + } + /** * Configure a sync or async S3 client builder. * This method handles all shared configuration. @@ -297,13 +316,14 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( * {@link AmazonS3EncryptionV2} if CSE is enabled. * * @param awsConf AWS configuration. + * @param bucket bucket name. * @param parameters parameters. - * * @return new AmazonS3 client. * @throws IOException if lookupPassword() has any problem. */ protected AmazonS3 buildAmazonS3EncryptionClient( final ClientConfiguration awsConf, + final String bucket, final S3ClientCreationParameters parameters) throws IOException { AmazonS3 client; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 66ba106297842..d29a4584103c2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -995,17 +995,14 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false)) .withUserAgentSuffix(uaSuffix) .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) - .withExecutionInterceptors(auditManager.createExecutionInterceptors()); + .withExecutionInterceptors(auditManager.createExecutionInterceptors()) + .withMinimumPartSize(partSize) + .withTransferManagerExecutor(unboundedThreadPool); - s3Client = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) - .createS3ClientV2(getUri(), - parameters); - - s3AsyncClient = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) - .createS3AsyncClient(getUri(), - parameters); - - initTransferManager(); + S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); + s3Client = clientFactory.createS3ClientV2(getUri(), parameters); + s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters); + transferManager = clientFactory.createS3TransferManager(getUri(), parameters); } /** @@ -1179,23 +1176,6 @@ public EncryptionSecrets getEncryptionSecrets() { return encryptionSecrets; } - private void initTransferManager() { - // TODO: move to client factory? - transferManager = S3TransferManager.builder() - .s3ClientConfiguration(clientConfiguration -> - // TODO: Temporarily using EU_WEST_1 as the region, ultimately this can maybe moved to - // the DefaultS3ClientFactory and use the region resolution logic there. Wait till we - // finalise region logic before making any changes here. Also add other - // configuration options? - clientConfiguration - .minimumPartSizeInBytes(partSize) - .credentialsProvider(credentials) - .region(Region.EU_WEST_1)) - .transferConfiguration(transferConfiguration -> - transferConfiguration.executor(unboundedThreadPool)) // TODO: double-check - .build(); - } - private void initCannedAcls(Configuration conf) { String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL); if (!cannedACLName.isEmpty()) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 1f1344677dbf4..c046f1e12633d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -24,19 +24,20 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Executor; +import com.amazonaws.monitoring.MonitoringListener; +import com.amazonaws.services.s3.AmazonS3; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; -import com.amazonaws.monitoring.MonitoringListener; -import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk; - import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; /** @@ -93,6 +94,17 @@ S3Client createS3ClientV2(URI uri, S3AsyncClient createS3AsyncClient(URI uri, S3ClientCreationParameters parameters) throws IOException; + /** + * Creates a new {@link S3TransferManager}. + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return S3 transfer manager + * @throws IOException on any IO problem + */ + S3TransferManager createS3TransferManager(URI uri, + S3ClientCreationParameters parameters) throws IOException; + /** * Settings for the S3 Client. * Implemented as a class to pass in so that adding @@ -154,6 +166,16 @@ final class S3ClientCreationParameters { */ private URI pathUri; + /** + * Minimum part size for transfer parts. + */ + private long minimumPartSize; + + /** + * Executor that the transfer manager will use to execute background tasks. + */ + private Executor transferManagerExecutor; + /** * List of execution interceptors to include in the chain * of interceptors in the SDK. @@ -324,5 +346,43 @@ public S3ClientCreationParameters withPathUri( pathUri = value; return this; } + + /** + * Get the minimum part size for transfer parts. + * @return part size + */ + public long getMinimumPartSize() { + return minimumPartSize; + } + + /** + * Set the minimum part size for transfer parts. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withMinimumPartSize( + final long value) { + minimumPartSize = value; + return this; + } + + /** + * Get the executor that the transfer manager will use to execute background tasks. + * @return part size + */ + public Executor getTransferManagerExecutor() { + return transferManagerExecutor; + } + + /** + * Set the executor that the transfer manager will use to execute background tasks. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withTransferManagerExecutor( + final Executor value) { + transferManagerExecutor = value; + return this; + } } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index ea5544930d211..e16a99be8e7c8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -20,6 +20,7 @@ import static org.mockito.Mockito.*; +import java.io.IOException; import java.net.URI; import java.util.ArrayList; @@ -28,6 +29,7 @@ import com.amazonaws.services.s3.model.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; /** * An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3} @@ -64,4 +66,11 @@ public S3AsyncClient createS3AsyncClient(URI uri, final S3ClientCreationParamete S3AsyncClient s3 = mock(S3AsyncClient.class); return s3; } + + @Override + public S3TransferManager createS3TransferManager(URI uri, S3ClientCreationParameters parameters) + throws IOException { + S3TransferManager tm = mock(S3TransferManager.class); + return tm; + } } From 38dff867b9d942830a222a493a45b686d2249705 Mon Sep 17 00:00:00 2001 From: Alessandro Passaro Date: Mon, 5 Dec 2022 11:39:28 +0000 Subject: [PATCH 06/13] Add unit tests for BlockingEnumeration --- .../fs/s3a/select/BlockingEnumeration.java | 11 +- .../hadoop/fs/s3a/select/StreamPublisher.java | 89 ++++++++ .../s3a/select/TestBlockingEnumeration.java | 200 ++++++++++++++++++ 3 files changed, 297 insertions(+), 3 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java index c09acc8ba2220..bc47db47c767d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java @@ -96,13 +96,18 @@ public boolean hasMoreElements() { } catch (InterruptedException e) { current = new Signal<>(e); subscription.thenAccept(Subscription::cancel); + Thread.currentThread().interrupt(); } } if (current.error != null) { - if (current.error instanceof SdkException) { - throw (SdkException)current.error; + Throwable error = current.error; + current = END_SIGNAL; + if (error instanceof Error) { + throw (Error)error; + } else if (error instanceof SdkException) { + throw (SdkException)error; } else { - throw SdkException.create("Unexpected error", current.error); + throw SdkException.create("Unexpected error", error); } } return current != END_SIGNAL; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java new file mode 100644 index 0000000000000..c770b8897338f --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.Iterator; +import java.util.concurrent.Executor; +import java.util.stream.Stream; + +import org.reactivestreams.Subscriber; +import org.reactivestreams.Subscription; +import software.amazon.awssdk.core.async.SdkPublisher; + +/** + * Publisher used to test the handling of asynchronous responses. + * @param The type of published elements. + */ +final class StreamPublisher implements SdkPublisher { + private final Executor executor; + private final Iterator iterator; + private Boolean done = false; + + public StreamPublisher(Stream data, Executor executor) { + this.iterator = data.iterator(); + this.executor = executor; + } + + public StreamPublisher(Stream data) { + this(data, Runnable::run); + } + + @Override + public void subscribe(Subscriber subscriber) { + subscriber.onSubscribe(new Subscription() { + @Override + public void request(long n) { + if (done) { + return; + } + + if (n < 1) { + done = true; + executor.execute(() -> subscriber.onError(new IllegalArgumentException())); + return; + } + + for (long i = 0; i < n; i++) { + final T value; + try { + synchronized (iterator) { + value = iterator.hasNext() ? iterator.next() : null; + } + } catch (Throwable e) { + executor.execute(() -> subscriber.onError(e)); + break; + } + + if (value == null) { + done = true; + executor.execute(subscriber::onComplete); + break; + } else { + executor.execute(() -> subscriber.onNext(value)); + } + } + } + + @Override + public void cancel() { + done = true; + } + }); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java new file mode 100644 index 0000000000000..43bdcb062f0a3 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.junit.Assert; +import org.junit.Test; +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; + +/** + * Unit tests for {@link BlockingEnumeration}. + */ +public final class TestBlockingEnumeration extends Assert { + + @Test + public void containsElement() { + SdkPublisher publisher = new StreamPublisher<>(Stream.of("foo")); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + + assertTrue(enumeration.hasMoreElements()); + assertEquals("foo", enumeration.nextElement()); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void containsInjectedElement() { + SdkPublisher publisher = new StreamPublisher<>(Stream.of("foo")); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1, "bar"); + + assertTrue(enumeration.hasMoreElements()); + assertEquals("bar", enumeration.nextElement()); + assertTrue(enumeration.hasMoreElements()); + assertEquals("foo", enumeration.nextElement()); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void throwsExceptionOnFirstElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .map(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void throwsExceptionAfterInjectedElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1, 99); + assertTrue(enumeration.hasMoreElements()); + assertEquals(99, enumeration.nextElement().intValue()); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void throwsNonSdkException() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw new RuntimeException("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + SdkException exception = Assert.assertThrows(SdkException.class, enumeration::hasMoreElements); + assertEquals(RuntimeException.class, exception.getCause().getClass()); + } + + @Test + public void throwsError() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + throw new Error("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(Error.class, enumeration::hasMoreElements); + } + + @Test + public void throwsExceptionOnSecondElement() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .peek(i -> { + if (i == 1) { + throw SdkException.create("error!", null); + } + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertTrue(enumeration.hasMoreElements()); + assertEquals(0, enumeration.nextElement().intValue()); + assertThrows(SdkException.class, enumeration::hasMoreElements); + } + + @Test + public void noMoreElementsAfterThrow() { + SdkPublisher publisher = new StreamPublisher<>( + Stream.of(0, 1) + .map(i -> { + throw SdkException.create("error!", null); + }), + Executors.newSingleThreadExecutor()); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, 1); + assertThrows(SdkException.class, enumeration::hasMoreElements); + assertFalse(enumeration.hasMoreElements()); + } + + @Test + public void buffersOnSameThread() { + verifyBuffering(10, 3, Runnable::run); + } + + @Test + public void publisherOnDifferentThread() { + verifyBuffering(5, 1, Executors.newSingleThreadExecutor()); + } + + @Test + public void publisherOnDifferentThreadWithBuffer() { + verifyBuffering(30, 10, Executors.newSingleThreadExecutor()); + } + + private static void verifyBuffering(int length, int bufferSize, Executor executor) { + AtomicInteger emitted = new AtomicInteger(); + SdkPublisher publisher = new StreamPublisher<>( + IntStream.range(0, length).boxed().peek(i -> emitted.incrementAndGet()), + executor); + + BlockingEnumeration enumeration = + new BlockingEnumeration<>(publisher, bufferSize); + + int pulled = 0; + while (true) { + try { + int expected = Math.min(length, pulled + bufferSize); + if (expected != emitted.get()) { + Thread.sleep(10); + } + assertEquals(expected, emitted.get()); + } catch (InterruptedException e) { + fail("Interrupted: " + e); + } + + if (!enumeration.hasMoreElements()) { + break; + } + + int i = enumeration.nextElement(); + assertEquals(pulled, i); + pulled++; + } + } +} From 0338fd89f6d822b91429d819d034adfb68d88dbd Mon Sep 17 00:00:00 2001 From: Alessandro Passaro Date: Mon, 5 Dec 2022 11:39:43 +0000 Subject: [PATCH 07/13] Add unit tests for SelectEventStreamPublisher --- .../TestSelectEventStreamPublisher.java | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java new file mode 100644 index 0000000000000..faf32fe4fd94d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.select; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Stream; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.core.async.SdkPublisher; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.http.AbortableInputStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; +import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; + +/** + * Unit tests for {@link SelectEventStreamPublisher}. + */ +@RunWith(Parameterized.class) +public final class TestSelectEventStreamPublisher extends Assert { + + @Parameterized.Parameters(name = "threading-{0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {"main"}, + {"background"} + }); + } + + private final String threading; + + public TestSelectEventStreamPublisher(String threading) { + this.threading = threading; + } + + private Executor createExecutor() { + if (threading.equals("main")) { + return Runnable::run; + } else if (threading.equals("background")) { + return Executors.newSingleThreadExecutor(); + } else { + throw new IllegalArgumentException("Unknown: " + threading); + } + } + + @Test + public void emptyRecordsInputStream() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromByteArray(new byte[0])) + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + assertEquals(-1, inputStream.read()); + } + } + + @Test + public void multipleRecords() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + String result = readAll(inputStream); + assertEquals("foobar", result); + } + } + + @Test + public void skipsOtherEvents() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.progressBuilder() + .build(), + SelectObjectContentEventStream.statsBuilder() + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build(), + SelectObjectContentEventStream.endBuilder() + .build())); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + String result = readAll(inputStream); + assertEquals("foobar", result); + } + } + + @Test + public void callsOnEndEvent() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.endBuilder() + .build())); + + AtomicBoolean endEvent = new AtomicBoolean(false); + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> endEvent.set(true))) { + String result = readAll(inputStream); + assertEquals("foo", result); + } + + assertTrue(endEvent.get()); + } + + @Test + public void handlesErrors() throws IOException { + SelectEventStreamPublisher selectEventStreamPublisher = + createSelectPublisher(Stream.of( + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("foo")) + .build(), + SelectObjectContentEventStream.recordsBuilder() + .payload(SdkBytes.fromUtf8String("bar")) + .build()) + .map(e -> { throw SdkException.create("error!", null); })); + + try (AbortableInputStream inputStream = + selectEventStreamPublisher.toRecordsInputStream(e -> {})) { + assertThrows(SdkException.class, () -> readAll(inputStream)); + } + } + + private SelectEventStreamPublisher createSelectPublisher( + Stream stream) { + SdkPublisher sdkPublisher = + new StreamPublisher<>(stream, createExecutor()); + CompletableFuture future = + CompletableFuture.completedFuture(null); + SelectObjectContentResponse response = + SelectObjectContentResponse.builder().build(); + return new SelectEventStreamPublisher(future, response, sdkPublisher); + } + + private static String readAll(InputStream inputStream) throws IOException { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + byte[] buffer = new byte[8096]; + int read; + while ((read = inputStream.read(buffer, 0, buffer.length)) != -1) { + outputStream.write(buffer, 0, read); + } + return outputStream.toString(); + } + } +} From 644a32f9d276d4ae7c7374c245465d1c47602b4f Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Fri, 9 Dec 2022 15:06:19 +0000 Subject: [PATCH 08/13] updates new providers in TestS3AAWSCredentialsProvider to V2 --- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 7c3d6d8548bf0..957db1a038f4b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -34,6 +34,7 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; @@ -482,17 +483,8 @@ public void refresh() { } } - private static final AWSCredentials EXPECTED_CREDENTIALS = new AWSCredentials() { - @Override - public String getAWSAccessKeyId() { - return "expectedAccessKey"; - } - - @Override - public String getAWSSecretKey() { - return "expectedSecret"; - } - }; + private static final AwsCredentials EXPECTED_CREDENTIALS = + AwsBasicCredentials.create("expectedAccessKey", "expectedSecret"); /** * Credential provider that takes a long time. @@ -504,7 +496,7 @@ public SlowProvider(@Nullable URI uri, Configuration conf) { } @Override - protected AWSCredentials createCredentials(Configuration config) throws IOException { + protected AwsCredentials createCredentials(Configuration config) throws IOException { // yield to other callers to induce race condition Thread.yield(); return EXPECTED_CREDENTIALS; @@ -578,7 +570,7 @@ public ErrorProvider(@Nullable URI uri, Configuration conf) { } @Override - protected AWSCredentials createCredentials(Configuration config) throws IOException { + protected AwsCredentials createCredentials(Configuration config) throws IOException { throw new IOException("expected error"); } } From ce02671de4ab354ec019fa8c335bad55e46ac906 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 18 Jan 2023 13:21:17 +0000 Subject: [PATCH 09/13] update GET range referrer header logic to V2 --- .../fs/s3a/audit/impl/LoggingAuditor.java | 28 ++++++------- .../fs/s3a/audit/AbstractAuditingTest.java | 41 +++++++++++++++---- .../audit/TestHttpReferrerAuditHeader.java | 26 +++++++----- 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index 0dd0976acc511..4005179d201bd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Map; +import software.amazon.awssdk.awscore.AwsExecutionAttribute; import software.amazon.awssdk.core.SdkRequest; import software.amazon.awssdk.core.interceptor.Context; import software.amazon.awssdk.core.interceptor.ExecutionAttributes; @@ -261,21 +262,18 @@ private class LoggingAuditSpan extends AbstractAuditSpanImpl { * Attach Range of data for GetObject Request. * @param request given get object request */ -// private void attachRangeFromRequest(AmazonWebServiceRequest request) { -// if (request instanceof GetObjectRequest) { -// long[] rangeValue = ((GetObjectRequest) request).getRange(); -// if (rangeValue == null || rangeValue.length == 0) { -// return; -// } -// if (rangeValue.length != 2) { -// WARN_INCORRECT_RANGE.warn("Expected range to contain 0 or 2 elements." -// + " Got {} elements. Ignoring.", rangeValue.length); -// return; -// } -// String combinedRangeValue = String.format("%d-%d", rangeValue[0], rangeValue[1]); -// referrer.set(AuditConstants.PARAM_RANGE, combinedRangeValue); -// } -// } + private void attachRangeFromRequest(SdkHttpRequest request, + ExecutionAttributes executionAttributes) { + + if (executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME).equals("GetObject")) { + if (request.headers() != null + && request.headers().get("Range") != null) { + String rangeValue = request.headers().get("Range").get(0); + String rangeHeader = rangeValue.split("=")[1]; + referrer.set(AuditConstants.PARAM_RANGE, rangeHeader); + } + } + } private final String description; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index 3f3b3149d426e..0f6421d1bc4e2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -22,10 +22,14 @@ import java.util.Arrays; import java.util.List; import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Map; import java.util.function.Consumer; import java.util.stream.Collectors; + +import software.amazon.awssdk.awscore.AwsExecutionAttribute; import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import software.amazon.awssdk.core.interceptor.InterceptorContext; import software.amazon.awssdk.http.SdkHttpMethod; @@ -162,14 +166,37 @@ protected SdkHttpRequest head() { } /** - * Create a GetObject request and modify it before passing it through auditor. - * @param modifyRequest Consumer Interface for changing the request before passing to the auditor - * @return the request + * Create a get request and pass it through the manager's beforeExecution() + * callback. + * + * @return a processed request. */ - protected GetObjectRequest get(Consumer modifyRequest) { - GetObjectRequest req = requestFactory.newGetObjectRequest("/"); - modifyRequest.accept(req); - return manager.beforeExecution(req); + protected SdkHttpRequest get(String range) { + GetObjectRequest.Builder getObjectRequestBuilder = + requestFactory.newGetObjectRequestBuilder("/"); + + SdkHttpRequest.Builder httpRequestBuilder = + SdkHttpRequest.builder().uri(URI.create("https://test")).method(SdkHttpMethod.GET); + + if (!range.isEmpty()) { + getObjectRequestBuilder.range(range); + List rangeHeader = new ArrayList<>(); + rangeHeader.add(range); + Map> headers = new HashMap<>(); + headers.put("Range", rangeHeader); + httpRequestBuilder.headers(headers); + } + + manager.requestCreated(getObjectRequestBuilder); + GetObjectRequest getObjectRequest = getObjectRequestBuilder.build(); + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build().putAttribute( + AwsExecutionAttribute.OPERATION_NAME, "GetObject"); + InterceptorContext context = InterceptorContext.builder() + .request(getObjectRequest) + .httpRequest(httpRequestBuilder.build()) + .build(); + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index 991379e435d91..430e1370c0397 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -308,13 +308,16 @@ public void testStripWrappedQuotes() throws Throwable { @Test public void testGetObjectRange() throws Throwable { AuditSpan span = span(); - GetObjectRequest request = get(getObjectRequest -> getObjectRequest.setRange(100, 200)); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = get("bytes=100-200"); + Map> headers = request.headers(); assertThat(headers) - .describedAs("Custom headers") - .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + .describedAs("Custom headers") + .containsKey(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); @@ -327,13 +330,16 @@ public void testGetObjectRange() throws Throwable { @Test public void testGetObjectWithoutRange() throws Throwable { AuditSpan span = span(); - GetObjectRequest request = get(getObjectRequest -> {}); - Map headers - = request.getCustomRequestHeaders(); + SdkHttpRequest request = get(""); + Map> headers = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); From fb6d12a9a59520e35049783851b5b2f626952d30 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 18 Jan 2023 15:55:26 +0000 Subject: [PATCH 10/13] adds in unit check for bytes --- .../hadoop/fs/s3a/audit/impl/LoggingAuditor.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index 4005179d201bd..b48519907b2f9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -265,12 +265,14 @@ private class LoggingAuditSpan extends AbstractAuditSpanImpl { private void attachRangeFromRequest(SdkHttpRequest request, ExecutionAttributes executionAttributes) { - if (executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME).equals("GetObject")) { - if (request.headers() != null - && request.headers().get("Range") != null) { - String rangeValue = request.headers().get("Range").get(0); - String rangeHeader = rangeValue.split("=")[1]; - referrer.set(AuditConstants.PARAM_RANGE, rangeHeader); + if (executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME) + .equals("GetObject")) { + if (request.headers() != null && request.headers().get("Range") != null) { + String[] rangeHeader = request.headers().get("Range").get(0).split("="); + // only set header if range unit is bytes + if (rangeHeader[0].equals("bytes")) { + referrer.set(AuditConstants.PARAM_RANGE, rangeHeader[1]); + } } } } From 1f137d3283c0402c1d2dfd4729df362ebe1a45d9 Mon Sep 17 00:00:00 2001 From: ahmarsuhail Date: Mon, 24 Apr 2023 16:40:17 +0100 Subject: [PATCH 11/13] HADOOP-18565. Complete outstanding items for the AWS SDK V2 upgrade. (#5421) Changes include * use bundled transfer manager * adds transfer listener to upload * adds support for custom signers * don't set default endpoint * removes v1 sdk bundle, only use core package * implements region caching + many more Note: spotbugs is warning about inconsistent synchronization in accessing a new s3a FS field. This will be fixed in a follow-up patch. Contributed by Ahmar Suhail --- .../fs/statistics/StoreStatisticNames.java | 4 + hadoop-project/pom.xml | 23 +- hadoop-tools/hadoop-aws/pom.xml | 6 +- .../org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 427 +------------ .../fs/s3a/InconsistentS3ClientFactory.java | 3 +- .../fs/s3a/ProgressableProgressListener.java | 6 +- .../hadoop/fs/s3a/S3ABlockOutputStream.java | 61 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 251 +++++--- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 570 ++---------------- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 73 +-- .../org/apache/hadoop/fs/s3a/Statistic.java | 5 + .../org/apache/hadoop/fs/s3a/UploadInfo.java | 2 +- .../hadoop/fs/s3a/WriteOperationHelper.java | 1 + .../hadoop/fs/s3a/api/RequestFactory.java | 13 +- .../fs/s3a/audit/impl/LoggingAuditor.java | 5 +- .../auth/AssumedRoleCredentialProvider.java | 2 +- .../s3a/auth/AwsCredentialListProvider.java | 283 +++++++++ .../hadoop/fs/s3a/auth/STSClientFactory.java | 8 +- .../hadoop/fs/s3a/auth/SignerFactory.java | 114 ++++ .../hadoop/fs/s3a/auth/SignerManager.java | 8 +- .../auth/delegation/S3ADelegationTokens.java | 6 +- .../auth/delegation/SessionTokenBinding.java | 4 +- .../hadoop/fs/s3a/impl/AWSCannedACL.java | 15 +- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 82 ++- .../hadoop/fs/s3a/impl/ChangeTracker.java | 19 +- .../impl/ConfigureShadedAWSSocketFactory.java | 13 +- .../hadoop/fs/s3a/impl/HeaderProcessing.java | 36 +- .../hadoop/fs/s3a/impl/NetworkBinding.java | 11 +- .../hadoop/fs/s3a/impl/ProgressListener.java | 26 + .../fs/s3a/impl/ProgressListenerEvent.java | 29 + .../fs/s3a/impl/RequestFactoryImpl.java | 35 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 4 +- .../fs/s3a/ITestS3ABucketExistence.java | 33 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 6 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 120 ++-- .../hadoop/fs/s3a/ITestS3AEncryptionSSEC.java | 2 +- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 209 +++---- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 2 +- .../hadoop/fs/s3a/MockS3ClientFactory.java | 43 +- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 30 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 2 +- .../fs/s3a/TestS3AExceptionTranslation.java | 35 +- .../apache/hadoop/fs/s3a/TestS3AProxy.java | 18 +- .../fs/s3a/TestWildflyAndOpenSSLBinding.java | 9 +- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 2 +- .../hadoop/fs/s3a/auth/ITestCustomSigner.java | 45 +- .../hadoop/fs/s3a/auth/TestSignerManager.java | 41 +- .../ITestSessionDelegationInFilesystem.java | 20 +- .../ITestS3AFileContextStatistics.java | 3 +- .../hadoop/fs/s3a/impl/ITestXAttrCost.java | 3 - .../fs/s3a/impl/TestHeaderProcessing.java | 6 + .../fs/s3a/impl/TestNetworkBinding.java | 43 -- .../fs/s3a/impl/TestRequestFactory.java | 12 +- .../ITestDirectoryMarkerListing.java | 2 +- .../s3a/scale/AbstractSTestS3AHugeFiles.java | 25 +- .../scale/ITestS3ADirectoryPerformance.java | 2 +- .../fs/s3a/select/ITestS3SelectLandsat.java | 2 +- .../hadoop/fs/s3a/tools/ITestMarkerTool.java | 7 - .../org.mockito.plugins.MockMaker | 13 - 60 files changed, 1305 insertions(+), 1577 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java index c04c1bb47fcea..3a8927aba493e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java @@ -407,6 +407,10 @@ public final class StoreStatisticNames { public static final String MULTIPART_UPLOAD_LIST = "multipart_upload_list"; + /** Probe for store region: {@value}. */ + public static final String STORE_REGION_PROBE + = "store_region_probe"; + private StoreStatisticNames() { } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 9ff558e23773a..cf7852bad7d70 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -185,7 +185,8 @@ 900 1.12.316 2.7.1 - 2.18.19 + 2.19.12 + 0.21.0 1.11.2 2.1 0.7 @@ -1130,14 +1131,8 @@ com.amazonaws - aws-java-sdk-bundle + aws-java-sdk-core ${aws-java-sdk.version} - - - io.netty - * - - software.amazon.awssdk @@ -1151,15 +1146,9 @@ - software.amazon.awssdk - s3-transfer-manager - ${aws-java-sdk-v2.version}-PREVIEW - - - io.netty - * - - + software.amazon.awssdk.crt + aws-crt + ${awscrt.version} org.apache.mina diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 89fa02e4c191c..0731df4daf089 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -496,7 +496,7 @@ com.amazonaws - aws-java-sdk-bundle + aws-java-sdk-core compile @@ -505,8 +505,8 @@ compile - software.amazon.awssdk - s3-transfer-manager + software.amazon.awssdk.crt + aws-crt compile diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 6f4ef6c178e72..29ec345c2f9cb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -585,7 +585,7 @@ private Constants() { public static final String SIGNING_ALGORITHM_STS = "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + "signing-algorithm"; + + ".signing-algorithm"; public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 098ac9e026959..1b2c129a6428a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -21,35 +21,11 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.util.List; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.RegionUtils; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Builder; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.AmazonS3EncryptionClientV2Builder; -import com.amazonaws.services.s3.AmazonS3EncryptionV2; -import com.amazonaws.services.s3.S3ClientOptions; -import com.amazonaws.services.s3.internal.ServiceUtils; -import com.amazonaws.services.s3.model.CryptoConfigurationV2; -import com.amazonaws.services.s3.model.CryptoMode; -import com.amazonaws.services.s3.model.CryptoRangeGetMode; -import com.amazonaws.services.s3.model.EncryptionMaterialsProvider; -import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider; -import com.amazonaws.util.AwsHostNameUtils; -import com.amazonaws.util.RuntimeHttpUtils; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; -import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; @@ -61,9 +37,6 @@ import software.amazon.awssdk.services.s3.S3BaseClientBuilder; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; -import software.amazon.awssdk.services.s3.model.HeadBucketRequest; -import software.amazon.awssdk.services.s3.model.HeadBucketResponse; -import software.amazon.awssdk.services.s3.model.S3Exception; import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.commons.lang3.StringUtils; @@ -75,18 +48,10 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; -import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; -import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; -import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; -import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; -import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; -import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; -import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; + /** * The default {@link S3ClientFactory} implementation. @@ -98,8 +63,6 @@ public class DefaultS3ClientFactory extends Configured implements S3ClientFactory { - private static final String S3_SERVICE_NAME = "s3"; - private static final String REQUESTER_PAYS_HEADER_VALUE = "requester"; /** @@ -108,85 +71,12 @@ public class DefaultS3ClientFactory extends Configured protected static final Logger LOG = LoggerFactory.getLogger(DefaultS3ClientFactory.class); - /** - * A one-off warning of default region chains in use. - */ - private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = - new LogExactlyOnce(LOG); - - /** - * Warning message printed when the SDK Region chain is in use. - */ - private static final String SDK_REGION_CHAIN_IN_USE = - "S3A filesystem client is using" - + " the SDK region resolution chain."; /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); - /** - * Create the client by preparing the AwsConf configuration - * and then invoking {@code buildAmazonS3Client()}. - */ - // TODO: Remove this and all code that configures the V1 S3 client. - @Override - public AmazonS3 createS3Client( - final URI uri, - final S3ClientCreationParameters parameters) throws IOException { - Configuration conf = getConf(); - String bucket = uri.getHost(); - final ClientConfiguration awsConf = S3AUtils - .createAwsConf(conf, - bucket, - Constants.AWS_SERVICE_IDENTIFIER_S3); - // add any headers - parameters.getHeaders().forEach((h, v) -> - awsConf.addHeader(h, v)); - - if (parameters.isRequesterPays()) { - // All calls must acknowledge requester will pay via header. - awsConf.addHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE); - } - - // When EXPERIMENTAL_AWS_INTERNAL_THROTTLING is false - // throttling is explicitly disabled on the S3 client so that - // all failures are collected in S3A instrumentation, and its - // retry policy is the only one used. - // This may cause problems in copy/rename. - awsConf.setUseThrottleRetries( - conf.getBoolean(EXPERIMENTAL_AWS_INTERNAL_THROTTLING, - EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT)); - - if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) { - awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix()); - } - - // Get the encryption method for this bucket. - S3AEncryptionMethods encryptionMethods = - getEncryptionAlgorithm(bucket, conf); - try { - // If CSE is enabled then build a S3EncryptionClient. - if (S3AEncryptionMethods.CSE_KMS.getMethod() - .equals(encryptionMethods.getMethod())) { - return buildAmazonS3EncryptionClient( - awsConf, - bucket, - parameters); - } else { - return buildAmazonS3Client( - awsConf, - parameters); - } - } catch (SdkClientException e) { - // SDK refused to build. - // TODO: remove? - //throw translateException("creating AWS S3 client", uri.toString(), e); - throw new IOException("creating AWS S3 client: "+ uri.toString(), e); - } - } - @Override - public S3Client createS3ClientV2( + public S3Client createS3Client( final URI uri, final S3ClientCreationParameters parameters) throws IOException { @@ -217,22 +107,10 @@ public S3AsyncClient createS3AsyncClient( } @Override - public S3TransferManager createS3TransferManager( - final URI uri, - final S3ClientCreationParameters parameters) - throws IOException { - Configuration conf = getConf(); - String bucket = uri.getHost(); - Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, - parameters.getCredentialSet()); + public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) { + return S3TransferManager.builder() - .s3ClientConfiguration(clientConfiguration -> - clientConfiguration - .minimumPartSizeInBytes(parameters.getMinimumPartSize()) - .credentialsProvider(parameters.getCredentialSet()) - .region(region)) - .transferConfiguration(transferConfiguration -> - transferConfiguration.executor(parameters.getTransferManagerExecutor())) + .s3Client(s3AsyncClient) .build(); } @@ -248,24 +126,26 @@ public S3TransferManager createS3TransferManager( * @param S3 client type */ private , ClientT> BuilderT configureClientBuilder( - BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket) { + BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket) + throws IOException { + + Region region = parameters.getRegion(); + LOG.debug("Using region {}", region); URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); - Region region = getS3Region(conf.getTrimmed(AWS_REGION), bucket, parameters.getCredentialSet()); - LOG.debug("Using endpoint {}; and region {}", endpoint, region); - // TODO: Some configuration done in configureBasicParams is not done yet. + if (endpoint != null) { + builder.endpointOverride(endpoint); + LOG.debug("Using endpoint {}", endpoint); + } + S3Configuration serviceConfiguration = S3Configuration.builder() .pathStyleAccessEnabled(parameters.isPathStyleAccess()) - // TODO: Review. Currently required to pass access point tests in ITestS3ABucketExistence, - // but resolving the region from the ap may be the correct solution. - .useArnRegionEnabled(true) .build(); return builder .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) .credentialsProvider(parameters.getCredentialSet()) - .endpointOverride(endpoint) .region(region) .serviceConfiguration(serviceConfiguration); } @@ -274,12 +154,13 @@ private , ClientT> Build * Create an override configuration for an S3 client. * @param parameters parameter object * @param conf configuration object + * @throws IOException any IOE raised, or translated exception * @return the override configuration */ protected ClientOverrideConfiguration createClientOverrideConfiguration( - S3ClientCreationParameters parameters, Configuration conf) { + S3ClientCreationParameters parameters, Configuration conf) throws IOException { final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = - AWSClientConfig.createClientConfigBuilder(conf); + AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3); // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); @@ -311,231 +192,6 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( return clientOverrideConfigBuilder.build(); } - /** - * Create an {@link AmazonS3} client of type - * {@link AmazonS3EncryptionV2} if CSE is enabled. - * - * @param awsConf AWS configuration. - * @param bucket bucket name. - * @param parameters parameters. - * @return new AmazonS3 client. - * @throws IOException if lookupPassword() has any problem. - */ - protected AmazonS3 buildAmazonS3EncryptionClient( - final ClientConfiguration awsConf, - final String bucket, - final S3ClientCreationParameters parameters) throws IOException { - - AmazonS3 client; - AmazonS3EncryptionClientV2Builder builder = - new AmazonS3EncryptionClientV2Builder(); - Configuration conf = getConf(); - - // CSE-KMS Method - String kmsKeyId = getS3EncryptionKey(bucket, conf, true); - // Check if kmsKeyID is not null - Preconditions.checkArgument(!StringUtils.isBlank(kmsKeyId), "CSE-KMS " - + "method requires KMS key ID. Use " + S3_ENCRYPTION_KEY - + " property to set it. "); - - EncryptionMaterialsProvider materialsProvider = - new KMSEncryptionMaterialsProvider(kmsKeyId); - builder.withEncryptionMaterialsProvider(materialsProvider); - //Configure basic params of a S3 builder. - configureBasicParams(builder, awsConf, parameters); - - // Configuring endpoint. - AmazonS3EncryptionClientV2Builder.EndpointConfiguration epr - = createEndpointConfiguration(parameters.getEndpoint(), - awsConf, getConf().getTrimmed(AWS_REGION)); - configureEndpoint(builder, epr); - - // Create cryptoConfig. - CryptoConfigurationV2 cryptoConfigurationV2 = - new CryptoConfigurationV2(CryptoMode.AuthenticatedEncryption) - .withRangeGetMode(CryptoRangeGetMode.ALL); - if (epr != null) { - cryptoConfigurationV2 - .withAwsKmsRegion(RegionUtils.getRegion(epr.getSigningRegion())); - LOG.debug("KMS region used: {}", cryptoConfigurationV2.getAwsKmsRegion()); - } - builder.withCryptoConfiguration(cryptoConfigurationV2); - client = builder.build(); - IGNORE_CSE_WARN.info("S3 client-side encryption enabled: Ignore S3-CSE " - + "Warnings."); - - return client; - } - - /** - * Use the Builder API to create an AWS S3 client. - *

    - * This has a more complex endpoint configuration mechanism - * which initially caused problems; the - * {@code withForceGlobalBucketAccessEnabled(true)} - * command is critical here. - * @param awsConf AWS configuration - * @param parameters parameters - * @return new AmazonS3 client - * @throws SdkClientException if the configuration is invalid. - */ - protected AmazonS3 buildAmazonS3Client( - final ClientConfiguration awsConf, - final S3ClientCreationParameters parameters) { - AmazonS3ClientBuilder b = AmazonS3Client.builder(); - configureBasicParams(b, awsConf, parameters); - - // endpoint set up is a PITA - AwsClientBuilder.EndpointConfiguration epr - = createEndpointConfiguration(parameters.getEndpoint(), - awsConf, getConf().getTrimmed(AWS_REGION)); - configureEndpoint(b, epr); - final AmazonS3 client = b.build(); - return client; - } - - /** - * A method to configure basic AmazonS3Builder parameters. - * - * @param builder Instance of AmazonS3Builder used. - * @param awsConf ClientConfiguration used. - * @param parameters Parameters used to set in the builder. - */ - private void configureBasicParams(AmazonS3Builder builder, - ClientConfiguration awsConf, S3ClientCreationParameters parameters) { - // TODO: This whole block will be removed when we remove the V1 client. - // builder.withCredentials(parameters.getCredentialSet()); - builder.withClientConfiguration(awsConf); - builder.withPathStyleAccessEnabled(parameters.isPathStyleAccess()); - - if (parameters.getMonitoringListener() != null) { - builder.withMonitoringListener(parameters.getMonitoringListener()); - } - - } - - /** - * A method to configure endpoint and Region for an AmazonS3Builder. - * - * @param builder Instance of AmazonS3Builder used. - * @param epr EndpointConfiguration used to set in builder. - */ - private void configureEndpoint( - AmazonS3Builder builder, - AmazonS3Builder.EndpointConfiguration epr) { - if (epr != null) { - // an endpoint binding was constructed: use it. - builder.withEndpointConfiguration(epr); - } else { - // no idea what the endpoint is, so tell the SDK - // to work it out at the cost of an extra HEAD request - builder.withForceGlobalBucketAccessEnabled(true); - // HADOOP-17771 force set the region so the build process doesn't halt. - String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION); - LOG.debug("fs.s3a.endpoint.region=\"{}\"", region); - if (!region.isEmpty()) { - // there's either an explicit region or we have fallen back - // to the central one. - LOG.debug("Using default endpoint; setting region to {}", region); - builder.setRegion(region); - } else { - // no region. - // allow this if people really want it; it is OK to rely on this - // when deployed in EC2. - WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE); - LOG.debug(SDK_REGION_CHAIN_IN_USE); - } - } - } - - /** - * Configure classic S3 client. - *

    - * This includes: endpoint, Path Access and possibly other - * options. - * - * @param s3 S3 Client. - * @param endPoint s3 endpoint, may be empty - * @param pathStyleAccess enable path style access? - * @return S3 client - * @throws IllegalArgumentException if misconfigured - */ - protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, - final String endPoint, - final boolean pathStyleAccess) - throws IllegalArgumentException { - if (!endPoint.isEmpty()) { - try { - s3.setEndpoint(endPoint); - } catch (IllegalArgumentException e) { - String msg = "Incorrect endpoint: " + e.getMessage(); - LOG.error(msg); - throw new IllegalArgumentException(msg, e); - } - } - if (pathStyleAccess) { - LOG.debug("Enabling path style access!"); - s3.setS3ClientOptions(S3ClientOptions.builder() - .setPathStyleAccess(true) - .build()); - } - return s3; - } - - /** - * Given an endpoint string, return an endpoint config, or null, if none - * is needed. - *

    - * This is a pretty painful piece of code. It is trying to replicate - * what AwsClient.setEndpoint() does, because you can't - * call that setter on an AwsClient constructed via - * the builder, and you can't pass a metrics collector - * down except through the builder. - *

    - * Note also that AWS signing is a mystery which nobody fully - * understands, especially given all problems surface in a - * "400 bad request" response, which, like all security systems, - * provides minimal diagnostics out of fear of leaking - * secrets. - * - * @param endpoint possibly null endpoint. - * @param awsConf config to build the URI from. - * @param awsRegion AWS S3 Region if the corresponding config is set. - * @return a configuration for the S3 client builder. - */ - @VisibleForTesting - public static AwsClientBuilder.EndpointConfiguration - createEndpointConfiguration( - final String endpoint, final ClientConfiguration awsConf, - String awsRegion) { - LOG.debug("Creating endpoint configuration for \"{}\"", endpoint); - if (endpoint == null || endpoint.isEmpty()) { - // the default endpoint...we should be using null at this point. - LOG.debug("Using default endpoint -no need to generate a configuration"); - return null; - } - - final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf); - LOG.debug("Endpoint URI = {}", epr); - String region = awsRegion; - if (StringUtils.isBlank(region)) { - if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) { - LOG.debug("Endpoint {} is not the default; parsing", epr); - region = AwsHostNameUtils.parseRegion( - epr.getHost(), - S3_SERVICE_NAME); - } else { - // US-east, set region == null. - LOG.debug("Endpoint {} is the standard one; declare region as null", - epr); - region = null; - } - } - LOG.debug("Region for endpoint {}, URI {} is determined as {}", - endpoint, epr, region); - return new AwsClientBuilder.EndpointConfiguration(endpoint, region); - } - /** * Given a endpoint string, create the endpoint URI. * @@ -550,8 +206,8 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { String protocol = secureConnections ? "https" : "http"; if (endpoint == null || endpoint.isEmpty()) { - // the default endpoint - endpoint = CENTRAL_ENDPOINT; + // don't set an endpoint if none is configured, instead let the SDK figure it out. + return null; } if (!endpoint.contains("://")) { @@ -564,45 +220,4 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { throw new IllegalArgumentException(e); } } - - /** - * Get the bucket region. - * - * @param region AWS S3 Region set in the config. This property may not be set, in which case - * ask S3 for the region. - * @param bucket Bucket name. - * @param credentialsProvider Credentials provider to be used with the default s3 client. - * @return region of the bucket. - */ - private static Region getS3Region(String region, String bucket, - AwsCredentialsProvider credentialsProvider) { - - if (!StringUtils.isBlank(region)) { - return Region.of(region); - } - - try { - // build a s3 client with region eu-west-1 that can be used to get the region of the bucket. - // Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because - // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com to - // the actual region the bucket is in. As the request is signed with us-east-1 and not the - // bucket's region, it fails. - S3Client s3Client = S3Client.builder().region(Region.EU_WEST_1) - .credentialsProvider(credentialsProvider) - .build(); - - HeadBucketResponse headBucketResponse = - s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); - return Region.of( - headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); - } catch (S3Exception exception) { - if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { - List bucketRegion = - exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER); - return Region.of(bucketRegion.get(0)); - } - } - - return Region.US_EAST_1; - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index e9946e7e85c34..d519c1c0763d8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.s3a; +import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; import software.amazon.awssdk.awscore.exception.AwsServiceException; @@ -45,7 +46,7 @@ public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { @Override protected ClientOverrideConfiguration createClientOverrideConfiguration( - S3ClientCreationParameters parameters, Configuration conf) { + S3ClientCreationParameters parameters, Configuration conf) throws IOException { LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **"); LOG.warn("List inconsistency is no longer emulated; only throttling and read errors"); return super.createClientOverrideConfiguration(parameters, conf) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java index 1c0fd76c6b107..b614b379bd6b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import software.amazon.awssdk.transfer.s3.ObjectTransfer; +import software.amazon.awssdk.transfer.s3.model.ObjectTransfer; import software.amazon.awssdk.transfer.s3.progress.TransferListener; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; @@ -66,7 +66,7 @@ public void bytesTransferred(TransferListener.Context.BytesTransferred context) progress.progress(); } - long transferred = context.progressSnapshot().bytesTransferred(); + long transferred = context.progressSnapshot().transferredBytes(); long delta = transferred - lastBytesTransferred; fs.incrementPutProgressStatistics(key, delta); lastBytesTransferred = transferred; @@ -80,7 +80,7 @@ public void bytesTransferred(TransferListener.Context.BytesTransferred context) public long uploadCompleted(ObjectTransfer upload) { long delta = - upload.progress().snapshot().bytesTransferred() - lastBytesTransferred; + upload.progress().snapshot().transferredBytes() - lastBytesTransferred; if (delta > 0) { LOG.debug("S3A write delta changed after finished: {} bytes", delta); fs.incrementPutProgressStatistics(key, delta); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index 39d7ed72d7c8d..cd64bbbbfe55b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -38,10 +38,9 @@ import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; +import org.apache.hadoop.fs.s3a.impl.ProgressListener; +import org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.IOStatisticsAggregator; import org.apache.hadoop.util.Preconditions; @@ -71,6 +70,7 @@ import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent.*; import static org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext.EMPTY_BLOCK_OUTPUT_STREAM_STATISTICS; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; @@ -194,7 +194,7 @@ class S3ABlockOutputStream extends OutputStream implements this.executorService = MoreExecutors.listeningDecorator( builder.executorService); this.multiPartUpload = null; - final Progressable progress = builder.progress; + Progressable progress = builder.progress; this.progressListener = (progress instanceof ProgressListener) ? (ProgressListener) progress : new ProgressableListener(progress); @@ -610,21 +610,19 @@ private long putObject() throws IOException { builder.putOptions, false); - // TODO: You cannot currently add progress listeners to requests not via the TM. - // There is an open ticket for this with the SDK team. But need to check how important - // this is for us? - // BlockUploadProgress callback = - // new BlockUploadProgress( - // block, progressListener, now()); - // putObjectRequest.setGeneralProgressListener(callback); + BlockUploadProgress progressCallback = + new BlockUploadProgress(block, progressListener, now()); statistics.blockUploadQueued(size); ListenableFuture putObjectResult = executorService.submit(() -> { try { // the putObject call automatically closes the input // stream afterwards. - return writeOperationHelper.putObject(putObjectRequest, builder.putOptions, uploadData, - uploadData.hasFile(), statistics); + PutObjectResponse response = + writeOperationHelper.putObject(putObjectRequest, builder.putOptions, uploadData, + uploadData.hasFile(), statistics); + progressCallback.progressChanged(REQUEST_BYTE_TRANSFER_EVENT); + return response; } finally { cleanupWithLogger(LOG, uploadData, block); } @@ -904,12 +902,8 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, throw e; } - // TODO: You cannot currently add progress listeners to requests not via the TM. - // See also putObject - // BlockUploadProgress callback = - // new BlockUploadProgress( - // block, progressListener, now()); - // request.setGeneralProgressListener(callback); + BlockUploadProgress progressCallback = + new BlockUploadProgress(block, progressListener, now()); statistics.blockUploadQueued(block.dataSize()); ListenableFuture partETagFuture = @@ -919,12 +913,18 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, try { LOG.debug("Uploading part {} for id '{}'", currentPartNumber, uploadId); + + progressCallback.progressChanged(TRANSFER_PART_STARTED_EVENT); + UploadPartResponse response = writeOperationHelper .uploadPart(request, requestBody, statistics); LOG.debug("Completed upload of {} to part {}", block, response.eTag()); LOG.debug("Stream statistics of {}", statistics); partsUploaded++; + + progressCallback.progressChanged(TRANSFER_PART_COMPLETED_EVENT); + return CompletedPart.builder() .eTag(response.eTag()) .partNumber(currentPartNumber) @@ -932,6 +932,7 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block, } catch (IOException e) { // save immediately. noteUploadFailure(e); + progressCallback.progressChanged(TRANSFER_PART_FAILED_EVENT); throw e; } finally { // close the stream and block @@ -1027,22 +1028,24 @@ private IOException abort() { } } + /** * The upload progress listener registered for events returned * during the upload of a single block. * It updates statistics and handles the end of the upload. * Transfer failures are logged at WARN. */ - private final class BlockUploadProgress implements ProgressListener { + private final class BlockUploadProgress { + private final S3ADataBlocks.DataBlock block; private final ProgressListener nextListener; private final Instant transferQueueTime; private Instant transferStartTime; + private long size; /** * Track the progress of a single block upload. * @param block block to monitor - * @param nextListener optional next progress listener * @param transferQueueTime time the block was transferred * into the queue */ @@ -1051,20 +1054,17 @@ private BlockUploadProgress(S3ADataBlocks.DataBlock block, Instant transferQueueTime) { this.block = block; this.transferQueueTime = transferQueueTime; + this.size = block.dataSize(); this.nextListener = nextListener; } - @Override - public void progressChanged(ProgressEvent progressEvent) { - ProgressEventType eventType = progressEvent.getEventType(); - long bytesTransferred = progressEvent.getBytesTransferred(); + public void progressChanged(ProgressListenerEvent eventType) { - long size = block.dataSize(); switch (eventType) { case REQUEST_BYTE_TRANSFER_EVENT: // bytes uploaded - statistics.bytesTransferred(bytesTransferred); + statistics.bytesTransferred(size); break; case TRANSFER_PART_STARTED_EVENT: @@ -1079,6 +1079,7 @@ public void progressChanged(ProgressEvent progressEvent) { statistics.blockUploadCompleted( Duration.between(transferStartTime, now()), size); + statistics.bytesTransferred(size); break; case TRANSFER_PART_FAILED_EVENT: @@ -1093,13 +1094,13 @@ public void progressChanged(ProgressEvent progressEvent) { } if (nextListener != null) { - nextListener.progressChanged(progressEvent); + nextListener.progressChanged(eventType, size); } } } /** - * Bridge from AWS {@code ProgressListener} to Hadoop {@link Progressable}. + * Bridge from {@link ProgressListener} to Hadoop {@link Progressable}. */ private static class ProgressableListener implements ProgressListener { private final Progressable progress; @@ -1108,7 +1109,7 @@ private static class ProgressableListener implements ProgressListener { this.progress = progress; } - public void progressChanged(ProgressEvent progressEvent) { + public void progressChanged(ProgressListenerEvent eventType, int bytesTransferred) { if (progress != null) { progress.progress(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d29a4584103c2..7416aa1fd113d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -34,6 +34,7 @@ import java.util.Collections; import java.util.Date; import java.util.EnumSet; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -58,11 +59,11 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.GetBucketAclRequest; import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.GetObjectResponse; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.MultipartUpload; import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; @@ -78,30 +79,30 @@ import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.NoSuchBucketException; -import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.services.s3.model.S3Exception; import software.amazon.awssdk.services.s3.model.S3Object; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; -import software.amazon.awssdk.transfer.s3.CompletedCopy; -import software.amazon.awssdk.transfer.s3.CompletedFileUpload; -import software.amazon.awssdk.transfer.s3.Copy; -import software.amazon.awssdk.transfer.s3.CopyRequest; -import software.amazon.awssdk.transfer.s3.FileUpload; +import software.amazon.awssdk.transfer.s3.model.CompletedCopy; +import software.amazon.awssdk.transfer.s3.model.CompletedFileUpload; +import software.amazon.awssdk.transfer.s3.model.Copy; import software.amazon.awssdk.transfer.s3.S3TransferManager; -import software.amazon.awssdk.transfer.s3.UploadFileRequest; +import software.amazon.awssdk.transfer.s3.model.CopyRequest; +import software.amazon.awssdk.transfer.s3.model.FileUpload; +import software.amazon.awssdk.transfer.s3.model.UploadFileRequest; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -156,6 +157,7 @@ import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.apache.hadoop.fs.statistics.IOStatisticsContext; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.store.audit.AuditEntryPoint; import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; import org.apache.hadoop.fs.store.audit.AuditSpan; @@ -228,6 +230,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.INITIALIZE_SPAN; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.STATEMENT_ALLOW_SSE_KMS_RW; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable; @@ -239,12 +242,12 @@ import static org.apache.hadoop.fs.s3a.impl.CreateFileBuilder.OPTIONS_CREATE_FILE_OVERWRITE; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_INACCESSIBLE; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_REQUIRED_EXCEPTION; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.ARN_BUCKET_OPTION; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403_FORBIDDEN; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT; @@ -329,10 +332,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private int executorCapacity; private long multiPartThreshold; public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class); + /** Exactly once log to warn about setting the region in config to avoid probe. */ + private static final LogExactlyOnce SET_REGION_WARNING = new LogExactlyOnce(LOG); private static final Logger PROGRESS = LoggerFactory.getLogger("org.apache.hadoop.fs.s3a.S3AFileSystem.Progress"); private LocalDirAllocator directoryAllocator; - private ObjectCannedACL cannedACL; + private String cannedACL; /** * This must never be null; until initialized it just declares that there @@ -445,6 +450,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private String scheme = FS_S3A; + private final static Map BUCKET_REGIONS = new HashMap<>(); + /** Add any deprecated keys. */ @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { @@ -718,7 +725,7 @@ private void setCSEGauge() { * bucket existence check is not done to improve performance of * S3AFileSystem initialization. When set to 1 or 2, bucket existence check * will be performed which is potentially slow. - * If 3 or higher: warn and use the v2 check. + * If 3 or higher: warn and skip check. * Also logging DNS address of the s3 endpoint if the bucket probe value is * greater than 0 else skipping it for increased performance. * @throws UnknownStoreException the bucket is absent @@ -735,18 +742,14 @@ private void doBucketProbing() throws IOException { LOG.debug("skipping check for bucket existence"); break; case 1: - logDnsLookup(getConf()); - verifyBucketExists(); - break; case 2: logDnsLookup(getConf()); - verifyBucketExistsV2(); + verifyBucketExists(); break; default: // we have no idea what this is, assume it is from a later release. - LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2", + LOG.warn("Unknown bucket probe option {}: {}; skipping check for bucket existence", S3A_BUCKET_PROBE, bucketProbe); - verifyBucketExistsV2(); break; } } @@ -844,58 +847,32 @@ protected static S3AStorageStatistics createStorageStatistics( * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ - // TODO: Review: this used to call doesBucketExist in v1, which does not check permissions, - // not even read access. @Retries.RetryTranslated protected void verifyBucketExists() throws UnknownStoreException, IOException { - if (!invoker.retry("doesBucketExist", bucket, true, - trackDurationOfOperation(getDurationTrackerFactory(), STORE_EXISTS_PROBE.getSymbol(), - () -> { + + if(!trackDurationAndSpan( + STORE_EXISTS_PROBE, bucket, null, () -> + invoker.retry("doestBucketExist", bucket, true, () -> { try { + if (BUCKET_REGIONS.containsKey(bucket)) { + return true; + } s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); return true; - } catch (NoSuchBucketException e) { - return false; - } - }))) { - throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); - } - } - - /** - * Verify that the bucket exists. This will correctly throw an exception - * when credentials are invalid. - * TODO: Review. May be redundant in v2. - * Retry policy: retrying, translated. - * @throws UnknownStoreException the bucket is absent - * @throws IOException any other problem talking to S3 - */ - @Retries.RetryTranslated - protected void verifyBucketExistsV2() - throws UnknownStoreException, IOException { - if (!invoker.retry("doesBucketExistV2", bucket, true, - trackDurationOfOperation(getDurationTrackerFactory(), - STORE_EXISTS_PROBE.getSymbol(), - () -> { - // Bug in SDK always returns `true` for AccessPoint ARNs with `doesBucketExistV2()` - // expanding implementation to use ARNs and buckets correctly - try { - s3Client.getBucketAcl(GetBucketAclRequest.builder() - .bucket(bucket) - .build()); } catch (AwsServiceException ex) { int statusCode = ex.statusCode(); if (statusCode == SC_404_NOT_FOUND || - (statusCode == SC_403_FORBIDDEN && - ex.getMessage().contains(AP_INACCESSIBLE))) { + (statusCode == SC_403_FORBIDDEN && accessPoint != null)) { return false; } } return true; }))) { - throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " - + "not exist"); + + throw new UnknownStoreException("s3a://" + bucket + "/", + " Bucket does " + "not exist. " + "Accessing with " + ENDPOINT + " set to " + + getConf().getTrimmed(ENDPOINT, null)); } } @@ -986,6 +963,12 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { ? conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT) : accessPoint.getEndpoint(); + String configuredRegion = accessPoint == null + ? conf.getTrimmed(AWS_REGION) + : accessPoint.getRegion(); + + Region region = getS3Region(configuredRegion); + S3ClientFactory.S3ClientCreationParameters parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(credentials) @@ -997,12 +980,94 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) .withExecutionInterceptors(auditManager.createExecutionInterceptors()) .withMinimumPartSize(partSize) - .withTransferManagerExecutor(unboundedThreadPool); + .withTransferManagerExecutor(unboundedThreadPool) + .withRegion(region); S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); - s3Client = clientFactory.createS3ClientV2(getUri(), parameters); + s3Client = clientFactory.createS3Client(getUri(), parameters); + createS3AsyncClient(clientFactory, parameters); + transferManager = clientFactory.createS3TransferManager(s3AsyncClient); + } + + /** + * Creates and configures the S3AsyncClient. + * Uses synchronized method to suppress spotbugs error. + * + * @param clientFactory factory used to create S3AsyncClient + * @param parameters parameter object + * @throws IOException on any IO problem + */ + private synchronized void createS3AsyncClient(S3ClientFactory clientFactory, + S3ClientFactory.S3ClientCreationParameters parameters) throws IOException { s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters); - transferManager = clientFactory.createS3TransferManager(getUri(), parameters); + } + + /** + * Get the bucket region. + * + * @param region AWS S3 Region set in the config. This property may not be set, in which case + * ask S3 for the region. + * @return region of the bucket. + */ + private Region getS3Region(String region) throws IOException { + + if (!StringUtils.isBlank(region)) { + return Region.of(region); + } + + Region cachedRegion = BUCKET_REGIONS.get(bucket); + + if (cachedRegion != null) { + LOG.debug("Got region {} for bucket {} from cache", cachedRegion, bucket); + return cachedRegion; + } + + Region s3Region = trackDurationAndSpan(STORE_REGION_PROBE, bucket, null, + () -> invoker.retry("getS3Region", bucket, true, () -> { + try { + + SET_REGION_WARNING.warn( + "Getting region for bucket {} from S3, this will slow down FS initialisation. " + + "To avoid this, set the region using property {}", bucket, + FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region"); + + // build a s3 client with region eu-west-1 that can be used to get the region of the + // bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com + // to the actual region the bucket is in. As the request is signed with us-east-1 and + // not the bucket's region, it fails. + S3Client getRegionS3Client = + S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials) + .build(); + + HeadBucketResponse headBucketResponse = + getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + + Region bucketRegion = Region.of( + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + Region bucketRegion = Region.of( + exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER) + .get(0)); + BUCKET_REGIONS.put(bucket, bucketRegion); + + return bucketRegion; + } + + if (exception.statusCode() == SC_404_NOT_FOUND) { + throw new UnknownStoreException("s3a://" + bucket + "/", + " Bucket does " + "not exist"); + } + + throw exception; + } + })); + + return s3Region; } /** @@ -1179,7 +1244,7 @@ public EncryptionSecrets getEncryptionSecrets() { private void initCannedAcls(Configuration conf) { String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL); if (!cannedACLName.isEmpty()) { - cannedACL = ObjectCannedACL.valueOf(AWSCannedACL.valueOf(cannedACLName).toString()); + cannedACL = AWSCannedACL.valueOf(cannedACLName).toString(); } else { cannedACL = null; } @@ -1283,7 +1348,7 @@ public int getDefaultPort() { * @return S3Client */ @VisibleForTesting - public S3Client getAmazonS3V2ClientForTesting(String reason) { + public S3Client getAmazonS3ClientForTesting(String reason) { LOG.warn("Access to S3 client requested, reason {}", reason); return s3Client; } @@ -1424,7 +1489,7 @@ protected void setBucket(String bucket) { * Get the canned ACL of this FS. * @return an ACL, if any */ - ObjectCannedACL getCannedACL() { + String getCannedACL() { return cannedACL; } @@ -2699,6 +2764,26 @@ protected HeadObjectResponse getObjectMetadata(String key, return response; } + /** + * Request bucket metadata. + * @return the metadata + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @Retries.RetryRaw + protected HeadBucketResponse getBucketMetadata() throws IOException { + final HeadBucketResponse response = trackDurationAndSpan(STORE_EXISTS_PROBE, bucket, null, + () -> invoker.retry("getBucketMetadata()", bucket, true, () -> { + try { + return s3Client.headBucket( + getRequestFactory().newHeadBucketRequestBuilder(bucket).build()); + } catch (NoSuchBucketException e) { + throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); + } + })); + return response; + } + /** * Initiate a {@code listObjects} operation, incrementing metrics * in the process. @@ -2976,10 +3061,12 @@ public UploadInfo putObject(PutObjectRequest putObjectRequest, File file, LOG.debug("PUT {} bytes to {} via transfer manager ", len, putObjectRequest.key()); incrementPutStartStatistics(len); - // TODO: Something not right with the TM listener, fix FileUpload upload = transferManager.uploadFile( - UploadFileRequest.builder().putObjectRequest(putObjectRequest).source(file).build()); - // .overrideConfiguration(o -> o.addListener(listener)).build()); + UploadFileRequest.builder() + .putObjectRequest(putObjectRequest) + .source(file) + .addTransferListener(listener) + .build()); return new UploadInfo(upload, len); } @@ -3038,16 +3125,7 @@ PutObjectResponse putObjectDirect(PutObjectRequest putObjectRequest, * @throws IllegalArgumentException if the length is negative */ private long getPutRequestLength(PutObjectRequest putObjectRequest) { - long len; - - // TODO: Check why this exists. Content length is set before. Why can't that be used directly? -// if (putObjectRequest.getFile() != null) { -// len = putObjectRequest.getFile().length(); -// } else { -// len = putObjectRequest.getMetadata().getContentLength(); -// } - - len = putObjectRequest.contentLength(); + long len = putObjectRequest.contentLength(); Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length"); return len; @@ -4261,19 +4339,9 @@ public List listAWSPolicyRules( @Retries.RetryTranslated private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) - throws IOException, InterruptedIOException { + throws IOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); - // TODO: Transfer manager currently only provides transfer listeners for upload, - // add progress listener for copy when this is supported. -// TODO: Is the above still valid? Try to enable when logger issue is resolved. -// TransferListener progressListener = new TransferListener() { -// @Override -// public void transferComplete(Context.TransferComplete context) { -// incrementWriteOperations(); -// } -// }; - ChangeTracker changeTracker = new ChangeTracker( keyToQualifiedPath(srcKey).toString(), changeDetectionPolicy, @@ -4316,10 +4384,6 @@ private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, Copy copy = transferManager.copy( CopyRequest.builder() .copyObjectRequest(copyObjectRequestBuilder.build()) -// TODO: Enable when logger issue is resolved. -// .overrideConfiguration(c -> c -// .addListener(getAuditManager().createTransferListener()) -// .addListener(progressListener)) .build()); try { @@ -4834,6 +4898,13 @@ public HeadObjectResponse getObjectMetadata(final String key) return once("getObjectMetadata", key, () -> S3AFileSystem.this.getObjectMetadata(key)); } + + @Override + public HeadBucketResponse getBucketMetadata() + throws IOException { + return once("getBucketMetadata", bucket, () -> + S3AFileSystem.this.getBucketMetadata()); + } } /** * {@inheritDoc}. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index d44b8cc82829c..8dbe3f12a6e2b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -18,11 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.auth.AWSCredentialsProvider; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.AbortedException; import software.amazon.awssdk.core.exception.SdkException; @@ -31,7 +26,6 @@ import software.amazon.awssdk.services.s3.model.S3Object; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -45,14 +39,11 @@ import org.apache.hadoop.fs.s3a.audit.AuditFailureException; import org.apache.hadoop.fs.s3a.audit.AuditIntegration; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; -import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; -import org.apache.hadoop.fs.s3a.impl.NetworkBinding; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.Lists; import org.slf4j.Logger; @@ -73,19 +64,13 @@ import java.net.URI; import java.nio.file.AccessDeniedException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; @@ -107,12 +92,7 @@ public final class S3AUtils { static final String CONSTRUCTOR_EXCEPTION = "constructor exception"; static final String INSTANTIATION_EXCEPTION = "instantiation exception"; - static final String NOT_AWS_PROVIDER = - "does not implement AWSCredentialsProvider"; - static final String NOT_AWS_V2_PROVIDER = - "does not implement AwsCredentialsProvider"; - static final String ABSTRACT_PROVIDER = - "is abstract and therefore cannot be created"; + static final String ENDPOINT_KEY = "Endpoint"; /** Filesystem is closed; kept here to keep the errors close. */ @@ -147,14 +127,6 @@ public final class S3AUtils { private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s"; - /** - * Error message when the AWS provider list built up contains a forbidden - * entry. - */ - @VisibleForTesting - public static final String E_FORBIDDEN_AWS_PROVIDER - = "AWS provider class cannot be used"; - private S3AUtils() { } @@ -235,16 +207,11 @@ public static IOException translateException(@Nullable String operation, case SC_301_MOVED_PERMANENTLY: case SC_307_TEMPORARY_REDIRECT: if (s3Exception != null) { - // TODO: Can we get the endpoint in v2? - // Maybe not: https://github.com/aws/aws-sdk-java-v2/issues/3048 -// if (s3Exception.getAdditionalDetails() != null && -// s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) { -// message = String.format("Received permanent redirect response to " -// + "endpoint %s. This likely indicates that the S3 endpoint " -// + "configured in %s does not match the AWS region containing " -// + "the bucket.", -// s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT); -// } + message = String.format("Received permanent redirect response to " + + "region %s. This likely indicates that the S3 region " + + "configured in %s does not match the AWS region containing " + "the bucket.", + s3Exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER), + AWS_REGION); ioe = new AWSRedirectException(message, s3Exception); } else { ioe = new AWSRedirectException(message, ase); @@ -569,135 +536,8 @@ public static long dateToLong(final Date date) { return date.getTime(); } - /** - * The standard AWS provider list for AWS connections. - */ - public static final List> - STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( - Arrays.asList( - TemporaryAWSCredentialsProvider.class, - SimpleAWSCredentialsProvider.class, - EnvironmentVariableCredentialsProvider.class, - IAMInstanceCredentialsProvider.class)); - - /** - * Create the AWS credentials from the providers, the URI and - * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. - * @param binding Binding URI -may be null - * @param conf filesystem configuration - * @return a credentials provider list - * @throws IOException Problems loading the providers (including reading - * secrets from credential files). - */ - public static AWSCredentialProviderList createAWSCredentialProviderSet( - @Nullable URI binding, - Configuration conf) throws IOException { - // this will reject any user:secret entries in the URI - S3xLoginHelper.rejectSecretsInURIs(binding); - AWSCredentialProviderList credentials = - buildAWSProviderList(binding, - conf, - AWS_CREDENTIALS_PROVIDER, - STANDARD_AWS_PROVIDERS, - new HashSet<>()); - // make sure the logging message strips out any auth details - LOG.debug("For URI {}, using credentials {}", - binding, credentials); - return credentials; - } - - /** - * Load list of AWS credential provider/credential provider factory classes. - * @param conf configuration - * @param key key - * @param defaultValue list of default values - * @return the list of classes, possibly empty - * @throws IOException on a failure to load the list. - */ - public static List> loadAWSProviderClasses(Configuration conf, - String key, - Class... defaultValue) throws IOException { - try { - return Arrays.asList(conf.getClasses(key, defaultValue)); - } catch (RuntimeException e) { - Throwable c = e.getCause() != null ? e.getCause() : e; - throw new IOException("From option " + key + ' ' + c, c); - } - } - - /** - * Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider. - */ - private static Map initCredentialProvidersMap() { - Map v1v2CredentialProviderMap = new HashMap<>(); - - v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider", - EnvironmentVariableCredentialsProvider.class); - v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper", - IAMInstanceCredentialsProvider.class); - v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider", - IAMInstanceCredentialsProvider.class); - - return v1v2CredentialProviderMap; - } - - /** - * Load list of AWS credential provider/credential provider factory classes; - * support a forbidden list to prevent loops, mandate full secrets, etc. - * @param binding Binding URI -may be null - * @param conf configuration - * @param key key - * @param forbidden a possibly empty set of forbidden classes. - * @param defaultValues list of default providers. - * @return the list of classes, possibly empty - * @throws IOException on a failure to load the list. - */ - public static AWSCredentialProviderList buildAWSProviderList( - @Nullable final URI binding, - final Configuration conf, - final String key, - final List> defaultValues, - final Set> forbidden) throws IOException { - - // build up the base provider - List> awsClasses = loadAWSProviderClasses(conf, - key, - defaultValues.toArray(new Class[defaultValues.size()])); - - Map v1v2CredentialProviderMap = initCredentialProvidersMap(); - // and if the list is empty, switch back to the defaults. - // this is to address the issue that configuration.getClasses() - // doesn't return the default if the config value is just whitespace. - if (awsClasses.isEmpty()) { - awsClasses = defaultValues; - } - // iterate through, checking for blacklists and then instantiating - // each provider - AWSCredentialProviderList providers = new AWSCredentialProviderList(); - for (Class aClass : awsClasses) { - - if (forbidden.contains(aClass)) { - throw new IOException(E_FORBIDDEN_AWS_PROVIDER - + " in option " + key + ": " + aClass); - } - - if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) && - aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){ - providers.add(createAWSV2CredentialProvider(conf, - v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding)); - } else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) { - providers.add(createAWSV1CredentialProvider(conf, - aClass, binding)); - } else { - providers.add(createAWSV2CredentialProvider(conf, aClass, binding)); - } - - } - return providers; - } - - /** - * Create an AWS credential provider from its class by using reflection. The + /*** + * Creates an instance of a class using reflection. The * class must implement one of the following means of construction, which are * attempted in order: * @@ -706,187 +546,79 @@ public static AWSCredentialProviderList buildAWSProviderList( * org.apache.hadoop.conf.Configuration *

  • a public constructor accepting * org.apache.hadoop.conf.Configuration
  • - *
  • a public static method named getInstance that accepts no + *
  • a public static method named as per methodName, that accepts no * arguments and returns an instance of - * com.amazonaws.auth.AWSCredentialsProvider, or
  • + * specified type, or *
  • a public default constructor.
  • * * + * @param instanceClass Class for which instance is to be created * @param conf configuration - * @param credClass credential class * @param uri URI of the FS - * @return the instantiated class - * @throws IOException on any instantiation failure. + * @param interfaceImplemented interface that this class implements + * @param methodName name of factory method to be invoked + * @param configKey config key under which this class is specified + * @param Instance of class + * @return instance of the specified class + * @throws IOException on any problem */ - private static AWSCredentialsProvider createAWSV1CredentialProvider( - Configuration conf, - Class credClass, - @Nullable URI uri) throws IOException { - AWSCredentialsProvider credentials = null; - String className = credClass.getName(); - if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); - } - if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); - } - LOG.debug("Credential provider class is {}", className); + @SuppressWarnings("unchecked") + public static InstanceT getInstanceFromReflection(Class instanceClass, + Configuration conf, @Nullable URI uri, Class interfaceImplemented, String methodName, + String configKey) throws IOException { + + String className = instanceClass.getName(); try { - // new X(uri, conf) - Constructor cons = getConstructor(credClass, URI.class, - Configuration.class); - if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf); - return credentials; - } - // new X(conf) - cons = getConstructor(credClass, Configuration.class); - if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(conf); - return credentials; + Constructor cons = null; + if (conf != null) { + // new X(uri, conf) + cons = getConstructor(instanceClass, URI.class, Configuration.class); + + if (cons != null) { + return (InstanceT) cons.newInstance(uri, conf); + } + // new X(conf) + cons = getConstructor(instanceClass, Configuration.class); + if (cons != null) { + return (InstanceT) cons.newInstance(conf); + } } - // X.getInstance() - Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class, - "getInstance"); + // X.methodName() + Method factory = getFactoryMethod(instanceClass, interfaceImplemented, methodName); if (factory != null) { - credentials = (AWSCredentialsProvider)factory.invoke(null); - return credentials; + return (InstanceT) factory.invoke(null); } // new X() - cons = getConstructor(credClass); + cons = getConstructor(instanceClass); if (cons != null) { - credentials = (AWSCredentialsProvider)cons.newInstance(); - return credentials; + return (InstanceT) cons.newInstance(); } // no supported constructor or factory method found throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION + ". A class specified in %s must provide a public constructor " + "of a supported signature, or a public factory method named " - + "getInstance that accepts no arguments.", - className, AWS_CREDENTIALS_PROVIDER)); + + "create that accepts no arguments.", className, configKey)); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (targetException == null) { - targetException = e; + targetException = e; } if (targetException instanceof IOException) { throw (IOException) targetException; } else if (targetException instanceof SdkException) { - throw translateException("Instantiate " + className, "", - (SdkException) targetException); + throw translateException("Instantiate " + className, "", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + targetException, + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + targetException, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + e, - e); - } - } - - /** - * Create an AWS credential provider from its class by using reflection. The - * class must implement one of the following means of construction, which are - * attempted in order: - * - *
      - *
    1. a public constructor accepting java.net.URI and - * org.apache.hadoop.conf.Configuration
    2. - *
    3. a public constructor accepting - * org.apache.hadoop.conf.Configuration
    4. - *
    5. a public static method named getInstance that accepts no - * arguments and returns an instance of - * software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or
    6. - *
    7. a public default constructor.
    8. - *
    - * - * @param conf configuration - * @param credClass credential class - * @param uri URI of the FS - * @return the instantiated class - * @throws IOException on any instantiation failure. - */ - private static AwsCredentialsProvider createAWSV2CredentialProvider( - Configuration conf, - Class credClass, - @Nullable URI uri) throws IOException { - AwsCredentialsProvider credentials = null; - String className = credClass.getName(); - if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER); - } - if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); - } - LOG.debug("Credential provider class is {}", className); - - try { - // new X(uri, conf) - Constructor cons = getConstructor(credClass, URI.class, - Configuration.class); - if (cons != null) { - credentials = (AwsCredentialsProvider)cons.newInstance(uri, conf); - return credentials; - } - // new X(conf) - cons = getConstructor(credClass, Configuration.class); - if (cons != null) { - credentials = (AwsCredentialsProvider)cons.newInstance(conf); - return credentials; - } - - // X.getInstance() - Method factory = getFactoryMethod(credClass, AwsCredentialsProvider.class, - "create"); - if (factory != null) { - credentials = (AwsCredentialsProvider)factory.invoke(null); - return credentials; - } - - // new X() - cons = getConstructor(credClass); - if (cons != null) { - credentials = (AwsCredentialsProvider)cons.newInstance(); - return credentials; - } - - // no supported constructor or factory method found - throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION - + ". A class specified in %s must provide a public constructor " - + "of a supported signature, or a public factory method named " - + "create that accepts no arguments.", - className, AWS_CREDENTIALS_PROVIDER)); - } catch (InvocationTargetException e) { - // TODO: Can probably be moved to a common method, but before doing this, check if we still - // want to extend V2 providers the same way v1 providers are. - Throwable targetException = e.getTargetException(); - if (targetException == null) { - targetException = e; - } - if (targetException instanceof IOException) { - throw (IOException) targetException; - } else if (targetException instanceof SdkException) { - throw translateException("Instantiate " + className, "", - (SdkException) targetException); - } else { - // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + targetException, - targetException); - } - } catch (ReflectiveOperationException | IllegalArgumentException e) { - // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION - + ": " + e, - e); + throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + e, e); } } @@ -1356,216 +1088,6 @@ public static void deleteWithWarning(FileSystem fs, } } - /** - * Create a new AWS {@code ClientConfiguration}. - * All clients to AWS services MUST use this for consistent setup - * of connectivity, UA, proxy settings. - * @param conf The Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @return new AWS client configuration - * @throws IOException problem creating AWS client configuration - * - * @deprecated use {@link #createAwsConf(Configuration, String, String)} - */ - @Deprecated - public static ClientConfiguration createAwsConf(Configuration conf, - String bucket) - throws IOException { - return createAwsConf(conf, bucket, null); - } - - /** - * Create a new AWS {@code ClientConfiguration}. All clients to AWS services - * MUST use this or the equivalents for the specific service for - * consistent setup of connectivity, UA, proxy settings. - * - * @param conf The Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @param awsServiceIdentifier a string representing the AWS service (S3, - * etc) for which the ClientConfiguration is being created. - * @return new AWS client configuration - * @throws IOException problem creating AWS client configuration - */ - public static ClientConfiguration createAwsConf(Configuration conf, - String bucket, String awsServiceIdentifier) - throws IOException { - final ClientConfiguration awsConf = new ClientConfiguration(); - initConnectionSettings(conf, awsConf); - initProxySupport(conf, bucket, awsConf); - initUserAgent(conf, awsConf); - if (StringUtils.isNotEmpty(awsServiceIdentifier)) { - String configKey = null; - switch (awsServiceIdentifier) { - case AWS_SERVICE_IDENTIFIER_S3: - configKey = SIGNING_ALGORITHM_S3; - break; - case AWS_SERVICE_IDENTIFIER_STS: - configKey = SIGNING_ALGORITHM_STS; - break; - default: - // Nothing to do. The original signer override is already setup - } - if (configKey != null) { - String signerOverride = conf.getTrimmed(configKey, ""); - if (!signerOverride.isEmpty()) { - LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, - signerOverride); - awsConf.setSignerOverride(signerOverride); - } - } - } - return awsConf; - } - - /** - * Initializes all AWS SDK settings related to connection management. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - * - * @throws IOException if there was an error initializing the protocol - * settings - */ - public static void initConnectionSettings(Configuration conf, - ClientConfiguration awsConf) throws IOException { - awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, - DEFAULT_MAXIMUM_CONNECTIONS, 1)); - initProtocolSettings(conf, awsConf); - awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, - DEFAULT_MAX_ERROR_RETRIES, 0)); - awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT, - DEFAULT_ESTABLISH_TIMEOUT, 0)); - awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT, 0)); - int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER, - DEFAULT_SOCKET_SEND_BUFFER, 2048); - int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER, - DEFAULT_SOCKET_RECV_BUFFER, 2048); - long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, - DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - - if (requestTimeoutMillis > Integer.MAX_VALUE) { - LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", - requestTimeoutMillis, Integer.MAX_VALUE); - requestTimeoutMillis = Integer.MAX_VALUE; - } - awsConf.setRequestTimeout((int) requestTimeoutMillis); - awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); - String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); - if (!signerOverride.isEmpty()) { - LOG.debug("Signer override = {}", signerOverride); - awsConf.setSignerOverride(signerOverride); - } - } - - /** - * Initializes the connection protocol settings when connecting to S3 (e.g. - * either HTTP or HTTPS). If secure connections are enabled, this method - * will load the configured SSL providers. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - * - * @throws IOException if there is an error initializing the configured - * {@link javax.net.ssl.SSLSocketFactory} - */ - private static void initProtocolSettings(Configuration conf, - ClientConfiguration awsConf) throws IOException { - boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, - DEFAULT_SECURE_CONNECTIONS); - awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); - if (secureConnections) { - NetworkBinding.bindSSLChannelMode(conf, awsConf); - } - } - - /** - * Initializes AWS SDK proxy support in the AWS client configuration - * if the S3A settings enable it. - *
    - * Note: LimitedPrivate to provide proxy support in ranger repository. - * - * @param conf Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @param awsConf AWS SDK configuration to update - * @throws IllegalArgumentException if misconfigured - * @throws IOException problem getting username/secret from password source. - */ - @InterfaceAudience.LimitedPrivate("Ranger") - public static void initProxySupport(Configuration conf, - String bucket, - ClientConfiguration awsConf) throws IllegalArgumentException, - IOException { - String proxyHost = conf.getTrimmed(PROXY_HOST, ""); - int proxyPort = conf.getInt(PROXY_PORT, -1); - if (!proxyHost.isEmpty()) { - awsConf.setProxyHost(proxyHost); - if (proxyPort >= 0) { - awsConf.setProxyPort(proxyPort); - } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { - LOG.warn("Proxy host set without port. Using HTTPS default 443"); - awsConf.setProxyPort(443); - } else { - LOG.warn("Proxy host set without port. Using HTTP default 80"); - awsConf.setProxyPort(80); - } - } - final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, - null, null); - final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, - null, null); - if ((proxyUsername == null) != (proxyPassword == null)) { - String msg = "Proxy error: " + PROXY_USERNAME + " or " + - PROXY_PASSWORD + " set without the other."; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - boolean isProxySecured = conf.getBoolean(PROXY_SECURED, false); - awsConf.setProxyUsername(proxyUsername); - awsConf.setProxyPassword(proxyPassword); - awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN)); - awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); - awsConf.setProxyProtocol(isProxySecured ? Protocol.HTTPS : Protocol.HTTP); - if (LOG.isDebugEnabled()) { - LOG.debug("Using proxy server {}://{}:{} as user {} with password {} " - + "on domain {} as workstation {}", - awsConf.getProxyProtocol(), - awsConf.getProxyHost(), - awsConf.getProxyPort(), - String.valueOf(awsConf.getProxyUsername()), - awsConf.getProxyPassword(), awsConf.getProxyDomain(), - awsConf.getProxyWorkstation()); - } - } else if (proxyPort >= 0) { - String msg = - "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - } - - /** - * Initializes the User-Agent header to send in HTTP requests to AWS - * services. We always include the Hadoop version number. The user also - * may set an optional custom prefix to put in front of the Hadoop version - * number. The AWS SDK internally appends its own information, which seems - * to include the AWS SDK version, OS and JVM version. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration to update - */ - private static void initUserAgent(Configuration conf, - ClientConfiguration awsConf) { - String userAgent = "Hadoop " + VersionInfo.getVersion(); - String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); - if (!userAgentPrefix.isEmpty()) { - userAgent = userAgentPrefix + ", " + userAgent; - } - LOG.debug("Using User-Agent: {}", userAgent); - awsConf.setUserAgentPrefix(userAgent); - } - /** * Convert the data of an iterator of {@link S3AFileStatus} to * an array. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index c046f1e12633d..fa2c0769d26a3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -26,10 +26,9 @@ import java.util.Map; import java.util.concurrent.Executor; -import com.amazonaws.monitoring.MonitoringListener; -import com.amazonaws.services.s3.AmazonS3; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.transfer.s3.S3TransferManager; @@ -41,7 +40,7 @@ import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; /** - * Factory for creation of {@link AmazonS3} client instances. + * Factory for creation of {@link S3Client} client instances. * Important: HBase's HBoss module implements this interface in its * tests. * Take care when updating this interface to ensure that a client @@ -55,17 +54,6 @@ @InterfaceStability.Evolving public interface S3ClientFactory { - /** - * Creates a new {@link AmazonS3} client. - * - * @param uri S3A file system URI - * @param parameters parameter object - * @return S3 client - * @throws IOException IO problem - */ - AmazonS3 createS3Client(URI uri, - S3ClientCreationParameters parameters) throws IOException; - /** * Creates a new {@link S3Client}. * The client returned supports synchronous operations. For @@ -77,14 +65,14 @@ AmazonS3 createS3Client(URI uri, * @return S3 client * @throws IOException on any IO problem */ - S3Client createS3ClientV2(URI uri, + S3Client createS3Client(URI uri, S3ClientCreationParameters parameters) throws IOException; /** * Creates a new {@link S3AsyncClient}. * The client returned supports asynchronous operations. For * synchronous operations, use - * {@link #createS3ClientV2(URI, S3ClientCreationParameters)}. + * {@link #createS3Client(URI, S3ClientCreationParameters)}. * * @param uri S3A file system URI * @param parameters parameter object @@ -97,13 +85,10 @@ S3AsyncClient createS3AsyncClient(URI uri, /** * Creates a new {@link S3TransferManager}. * - * @param uri S3A file system URI - * @param parameters parameter object + * @param s3AsyncClient the async client to be used by the TM. * @return S3 transfer manager - * @throws IOException on any IO problem */ - S3TransferManager createS3TransferManager(URI uri, - S3ClientCreationParameters parameters) throws IOException; + S3TransferManager createS3TransferManager(S3AsyncClient s3AsyncClient); /** * Settings for the S3 Client. @@ -128,11 +113,6 @@ final class S3ClientCreationParameters { */ private final Map headers = new HashMap<>(); - /** - * Monitoring listener. - */ - private MonitoringListener monitoringListener; - /** * RequestMetricCollector metrics...if not-null will be wrapped * with an {@code AwsStatisticsCollector} and passed to @@ -176,6 +156,12 @@ final class S3ClientCreationParameters { */ private Executor transferManagerExecutor; + /** + * Region of the S3 bucket. + */ + private Region region; + + /** * List of execution interceptors to include in the chain * of interceptors in the SDK. @@ -196,21 +182,6 @@ public S3ClientCreationParameters withExecutionInterceptors( return this; } - public MonitoringListener getMonitoringListener() { - return monitoringListener; - } - - /** - * listener for AWS monitoring events. - * @param listener listener - * @return this object - */ - public S3ClientCreationParameters withMonitoringListener( - @Nullable final MonitoringListener listener) { - monitoringListener = listener; - return this; - } - public StatisticsFromAwsSdk getMetrics() { return metrics; } @@ -384,5 +355,25 @@ public S3ClientCreationParameters withTransferManagerExecutor( transferManagerExecutor = value; return this; } + + /** + * Set region. + * + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withRegion( + final Region value) { + region = value; + return this; + } + + /** + * Get the region. + * @return invoker + */ + public Region getRegion() { + return region; + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index 651769ff283bd..ae761fe270f46 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -538,6 +538,11 @@ public enum Statistic { StoreStatisticNames.STORE_IO_THROTTLE_RATE, "Rate of S3 request throttling", TYPE_QUANTILE), + STORE_REGION_PROBE( + StoreStatisticNames.STORE_REGION_PROBE, + "Store Region Probe", + TYPE_DURATION + ), /* * Delegation Token Operations. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java index ee3a7c8a7e557..85ed69e14bc79 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import software.amazon.awssdk.transfer.s3.FileUpload; +import software.amazon.awssdk.transfer.s3.model.FileUpload; /** * Simple struct that contains information about a S3 upload. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 40eec3bed8213..3a710fab74cc3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -709,6 +709,7 @@ public interface WriteOperationHelperCallbacks { /** * Initiates a select request. * @param request selectObjectContent request + * @param t selectObjectContent request handler * @return selectObjectContentResult */ CompletableFuture selectObjectContent(SelectObjectContentRequest request, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index d05a7dc878a38..c7984aa4e2605 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -29,12 +29,12 @@ import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; @@ -73,7 +73,7 @@ public interface RequestFactory { * Get the canned ACL of this FS. * @return an ACL, if any */ - ObjectCannedACL getCannedACL(); + String getCannedACL(); /** * Get the encryption algorithm of this endpoint. @@ -173,12 +173,19 @@ CompleteMultipartUploadRequest.Builder newCompleteMultipartUploadRequestBuilder( List partETags); /** - * Create a HEAD request builder. + * Create a HEAD object request builder. * @param key key, may have trailing / * @return the request builder. */ HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key); + /** + * Create a HEAD bucket request builder. + * @param bucket bucket to get metadata for + * @return the request builder. + */ + HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucket); + /** * Create a GET request builder. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index b48519907b2f9..602e0ef8ecb5b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -265,8 +265,9 @@ private class LoggingAuditSpan extends AbstractAuditSpanImpl { private void attachRangeFromRequest(SdkHttpRequest request, ExecutionAttributes executionAttributes) { - if (executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME) - .equals("GetObject")) { + String operationName = executionAttributes.getAttribute(AwsExecutionAttribute.OPERATION_NAME); + + if (operationName != null && operationName.equals("GetObject")) { if (request.headers() != null && request.headers().get("Range") != null) { String[] rangeHeader = request.headers().get("Range").get(0).split("="); // only set header if range unit is bytes diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index e493154d85535..3517fabb9579e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -53,7 +53,7 @@ import org.apache.hadoop.security.UserGroupInformation; import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; /** * Support IAM Assumed roles by instantiating an instance of diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java new file mode 100644 index 0000000000000..d94f8c25bad6b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.lang.reflect.Modifier; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; + +import com.amazonaws.auth.AWSCredentialsProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; +import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider; +import org.apache.hadoop.fs.s3native.S3xLoginHelper; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_AUTH_CLASS_PREFIX; +import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; + +/** + * This class provides methods to create the list of AWS credential providers. + */ +public final class AwsCredentialListProvider { + + private AwsCredentialListProvider() { + } + + private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); + + public static final String NOT_AWS_PROVIDER = + "does not implement AWSCredentialsProvider"; + public static final String NOT_AWS_V2_PROVIDER = + "does not implement AwsCredentialsProvider"; + public static final String ABSTRACT_PROVIDER = + "is abstract and therefore cannot be created"; + + /** + * Error message when the AWS provider list built up contains a forbidden + * entry. + */ + @VisibleForTesting + public static final String E_FORBIDDEN_AWS_PROVIDER + = "AWS provider class cannot be used"; + + /** + * The standard AWS provider list for AWS connections. + */ + public static final List> + STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( + Arrays.asList( + TemporaryAWSCredentialsProvider.class, + SimpleAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class, + IAMInstanceCredentialsProvider.class)); + + /** + * Create the AWS credentials from the providers, the URI and + * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. + * @param binding Binding URI -may be null + * @param conf filesystem configuration + * @return a credentials provider list + * @throws IOException Problems loading the providers (including reading + * secrets from credential files). + */ + public static AWSCredentialProviderList createAWSCredentialProviderSet( + @Nullable URI binding, + Configuration conf) throws IOException { + // this will reject any user:secret entries in the URI + S3xLoginHelper.rejectSecretsInURIs(binding); + AWSCredentialProviderList credentials = + buildAWSProviderList(binding, + conf, + AWS_CREDENTIALS_PROVIDER, + STANDARD_AWS_PROVIDERS, + new HashSet<>()); + // make sure the logging message strips out any auth details + LOG.debug("For URI {}, using credentials {}", + binding, credentials); + return credentials; + } + + /** + * Load list of AWS credential provider/credential provider factory classes. + * @param conf configuration + * @param key key + * @param defaultValue list of default values + * @return the list of classes, possibly empty + * @throws IOException on a failure to load the list. + */ + private static List> loadAWSProviderClasses(Configuration conf, + String key, + Class... defaultValue) throws IOException { + try { + return Arrays.asList(conf.getClasses(key, defaultValue)); + } catch (RuntimeException e) { + Throwable c = e.getCause() != null ? e.getCause() : e; + throw new IOException("From option " + key + ' ' + c, c); + } + } + + /** + * Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider. + */ + private static Map initCredentialProvidersMap() { + Map v1v2CredentialProviderMap = new HashMap<>(); + + v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider", + EnvironmentVariableCredentialsProvider.class); + v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper", + IAMInstanceCredentialsProvider.class); + v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider", + IAMInstanceCredentialsProvider.class); + + return v1v2CredentialProviderMap; + } + + /** + * Load list of AWS credential provider/credential provider factory classes; + * support a forbidden list to prevent loops, mandate full secrets, etc. + * @param binding Binding URI -may be null + * @param conf configuration + * @param key key + * @param forbidden a possibly empty set of forbidden classes. + * @param defaultValues list of default providers. + * @return the list of classes, possibly empty + * @throws IOException on a failure to load the list. + */ + public static AWSCredentialProviderList buildAWSProviderList( + @Nullable final URI binding, + final Configuration conf, + final String key, + final List> defaultValues, + final Set> forbidden) throws IOException { + + // build up the base provider + List> awsClasses = loadAWSProviderClasses(conf, + key, + defaultValues.toArray(new Class[defaultValues.size()])); + + Map v1v2CredentialProviderMap = initCredentialProvidersMap(); + // and if the list is empty, switch back to the defaults. + // this is to address the issue that configuration.getClasses() + // doesn't return the default if the config value is just whitespace. + if (awsClasses.isEmpty()) { + awsClasses = defaultValues; + } + // iterate through, checking for blacklists and then instantiating + // each provider + AWSCredentialProviderList providers = new AWSCredentialProviderList(); + for (Class aClass : awsClasses) { + + if (forbidden.contains(aClass)) { + throw new IOException(E_FORBIDDEN_AWS_PROVIDER + + " in option " + key + ": " + aClass); + } + + if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) && + aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){ + providers.add(createAWSV2CredentialProvider(conf, + v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding)); + } else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) { + providers.add(createAWSV1CredentialProvider(conf, + aClass, binding)); + } else { + providers.add(createAWSV2CredentialProvider(conf, aClass, binding)); + } + + } + return providers; + } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
      + *
    1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
    2. + *
    3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
    4. + *
    5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * com.amazonaws.auth.AWSCredentialsProvider, or
    6. + *
    7. a public default constructor.
    8. + *
    + * + * @param conf configuration + * @param credClass credential class + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + private static AWSCredentialsProvider createAWSV1CredentialProvider(Configuration conf, + Class credClass, @Nullable URI uri) throws IOException { + AWSCredentialsProvider credentials = null; + String className = credClass.getName(); + if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { + throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); + } + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + + credentials = + S3AUtils.getInstanceFromReflection(credClass, conf, uri, AWSCredentialsProvider.class, + "getInstance", AWS_CREDENTIALS_PROVIDER); + return credentials; + + } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
      + *
    1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
    2. + *
    3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
    4. + *
    5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or
    6. + *
    7. a public default constructor.
    8. + *
    + * + * @param conf configuration + * @param credClass credential class + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + private static AwsCredentialsProvider createAWSV2CredentialProvider(Configuration conf, + Class credClass, @Nullable URI uri) throws IOException { + AwsCredentialsProvider credentials = null; + String className = credClass.getName(); + if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) { + throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER); + } + if (Modifier.isAbstract(credClass.getModifiers())) { + throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); + } + LOG.debug("Credential provider class is {}", className); + credentials = + S3AUtils.getInstanceFromReflection(credClass, conf, uri, AwsCredentialsProvider.class, + "create", AWS_CREDENTIALS_PROVIDER); + return credentials; + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java index 62f6ea845e6bf..dcfc0a7ee8416 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java @@ -51,6 +51,7 @@ import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isNotEmpty; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*; /** @@ -117,7 +118,9 @@ public static StsClientBuilder builder( * @param credentials AWS credential chain to use * @param stsEndpoint optional endpoint "https://sns.us-west-1.amazonaws.com" * @param stsRegion the region, e.g "us-west-1". Must be set if endpoint is. + * @param bucket bucket name * @return the builder to call {@code build()} + * @throws IOException problem reading proxy secrets */ public static StsClientBuilder builder(final AwsCredentialsProvider credentials, final Configuration conf, final String stsEndpoint, final String stsRegion, @@ -127,7 +130,7 @@ public static StsClientBuilder builder(final AwsCredentialsProvider credentials, Preconditions.checkArgument(credentials != null, "No credentials"); final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = - AWSClientConfig.createClientConfigBuilder(conf); + AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS); final ApacheHttpClient.Builder httpClientBuilder = AWSClientConfig.createHttpClientBuilder(conf); @@ -143,7 +146,6 @@ public static StsClientBuilder builder(final AwsCredentialsProvider credentials, .overrideConfiguration(clientOverrideConfigBuilder.build()) .credentialsProvider(credentials); - // TODO: SIGNERS NOT ADDED YET. boolean destIsStandardEndpoint = STS_STANDARD.equals(stsEndpoint); if (isNotEmpty(stsEndpoint) && !destIsStandardEndpoint) { Preconditions.checkArgument(isNotEmpty(stsRegion), @@ -165,8 +167,6 @@ public static StsClientBuilder builder(final AwsCredentialsProvider credentials, */ private static URI getSTSEndpoint(String endpoint) { try { - // TODO: The URI builder is currently imported via a shaded dependency. This is due to TM - // preview dependency causing some issues. return new URIBuilder().setScheme("https").setHost(endpoint).build(); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java new file mode 100644 index 0000000000000..7beabb9fa3c84 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.signer.Aws4Signer; +import software.amazon.awssdk.auth.signer.Aws4UnsignedPayloadSigner; +import software.amazon.awssdk.auth.signer.AwsS3V4Signer; +import software.amazon.awssdk.core.signer.NoOpSigner; +import software.amazon.awssdk.core.signer.Signer; + +import org.apache.hadoop.fs.s3a.S3AUtils; + + +/** + * Signer factory used to register and create signers. + */ +public final class SignerFactory { + + private static final Logger LOG = LoggerFactory.getLogger(SignerFactory.class); + public static final String VERSION_FOUR_SIGNER = "AWS4SignerType"; + public static final String VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER = "AWS4UnsignedPayloadSignerType"; + public static final String NO_OP_SIGNER = "NoOpSignerType"; + private static final String S3_V4_SIGNER = "AWSS3V4SignerType"; + + private static final Map> SIGNERS + = new ConcurrentHashMap<>(); + + static { + // Register the standard signer types. + SIGNERS.put(VERSION_FOUR_SIGNER, Aws4Signer.class); + SIGNERS.put(VERSION_FOUR_UNSIGNED_PAYLOAD_SIGNER, Aws4UnsignedPayloadSigner.class); + SIGNERS.put(NO_OP_SIGNER, NoOpSigner.class); + SIGNERS.put(S3_V4_SIGNER, AwsS3V4Signer.class); + } + + + private SignerFactory() { + } + + /** + * Register an implementation class for the given signer type. + * + * @param signerType The name of the signer type to register. + * @param signerClass The class implementing the given signature protocol. + */ + public static void registerSigner( + final String signerType, + final Class signerClass) { + + if (signerType == null) { + throw new IllegalArgumentException("signerType cannot be null"); + } + if (signerClass == null) { + throw new IllegalArgumentException("signerClass cannot be null"); + } + + SIGNERS.put(signerType, signerClass); + } + + /** + * Check if the signer has already been registered. + * @param signerType signer to get + */ + public static void verifySignerRegistered(String signerType) { + Class signerClass = SIGNERS.get(signerType); + if (signerClass == null) { + throw new IllegalArgumentException("unknown signer type: " + signerType); + } + } + + + /** + * Create an instance of the given signer. + * + * @param signerType The signer type. + * @param configKey Config key used to configure the signer. + * @return The new signer instance. + * @throws IOException on any problem. + */ + public static Signer createSigner(String signerType, String configKey) throws IOException { + Class signerClass = SIGNERS.get(signerType); + String className = signerClass.getName(); + + LOG.debug("Signer class is {}", className); + + Signer signer = + S3AUtils.getInstanceFromReflection(signerClass, null, null, Signer.class, "create", + configKey); + + return signer; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java index e162428787cc4..198bc66133a95 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerManager.java @@ -22,14 +22,12 @@ import java.util.LinkedList; import java.util.List; -import com.amazonaws.auth.Signer; -import com.amazonaws.auth.SignerFactory; +import software.amazon.awssdk.core.signer.Signer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider; -import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -71,8 +69,6 @@ public void initCustomSigners() { return; } - V2Migration.v1CustomSignerUsed(); - for (String customSigner : customSigners) { String[] parts = customSigner.split(":"); if (!(parts.length == 1 || parts.length == 2 || parts.length == 3)) { @@ -119,7 +115,7 @@ public void initCustomSigners() { private static void maybeRegisterSigner(String signerName, String signerClassName, Configuration conf) { try { - SignerFactory.getSignerByTypeAndService(signerName, null); + SignerFactory.verifySignerRegistered(signerName); } catch (IllegalArgumentException e) { // Signer is not registered with the AWS SDK. // Load the class and register the signer. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java index 0a73411156b6d..bfb7e6966457b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java @@ -42,6 +42,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.DelegationTokenIssuer; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.util.DurationInfo; import static org.apache.hadoop.fs.s3a.Statistic.DELEGATION_TOKENS_ISSUED; @@ -227,10 +228,7 @@ protected void serviceStop() throws Exception { try { super.serviceStop(); } finally { - // TODO: Importing the transfer manager preview outside of the bundle causes some - // issues. Over here, it can no longer find the LOG. We expect this to be fixed with the - // release of the TM. - // ServiceOperations.stopQuietly(LOG, tokenBinding); + ServiceOperations.stopQuietly(LOG, tokenBinding); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 440f5305af2c2..4b9fd517b264b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -47,8 +47,8 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; import static org.apache.hadoop.fs.s3a.Invoker.once; -import static org.apache.hadoop.fs.s3a.S3AUtils.STANDARD_AWS_PROVIDERS; -import static org.apache.hadoop.fs.s3a.S3AUtils.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromAWSCredentials; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromSTSCredentials; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java index 2f52f3ae9a832..12a89d50f6b0d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSCannedACL.java @@ -22,13 +22,14 @@ * Enum to map AWS SDK V1 Acl values to SDK V2. */ public enum AWSCannedACL { - Private("PRIVATE"), - PublicRead("PUBLIC_READ"), - PublicReadWrite("PUBLIC_READ_WRITE"), - AuthenticatedRead("AUTHENTICATED_READ"), - AwsExecRead("AWS_EXEC_READ"), - BucketOwnerRead("BUCKET_OWNER_READ"), - BucketOwnerFullControl("BUCKET_OWNER_FULL_CONTROL"); + Private("private"), + PublicRead("public-read"), + PublicReadWrite("public-read-write"), + AuthenticatedRead("authenticated-read"), + AwsExecRead("aws-exec-read"), + BucketOwnerRead("bucket-owner-read"), + BucketOwnerFullControl("bucket-owner-full-control"), + LogDeliveryWrite("log-delivery-write"); private final String value; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 55fb1132bb0e7..a69907755eeac 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -32,19 +32,19 @@ import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.http.apache.ProxyConfiguration; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; -// TODO: Update to use the non shaded dependency. There is an issue with the preview version of TM -// which is preventing this, should be resolve with the TM release. -import software.amazon.awssdk.thirdparty.org.apache.http.client.utils.URIBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.auth.SignerFactory; import org.apache.hadoop.util.VersionInfo; +import org.apache.http.client.utils.URIBuilder; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT; -import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; @@ -53,10 +53,13 @@ import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD; import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_SECURED; import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME; import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION; import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; -import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_STS; import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; @@ -71,7 +74,8 @@ public final class AWSClientConfig { private AWSClientConfig() { } - public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { + public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf, + String awsServiceIdentifier) throws IOException { ClientOverrideConfiguration.Builder overrideConfigBuilder = ClientOverrideConfiguration.builder(); @@ -79,12 +83,14 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf initUserAgent(conf, overrideConfigBuilder); - // TODO: Look at signers. See issue https://github.com/aws/aws-sdk-java-v2/issues/1024 - // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); - // if (!signerOverride.isEmpty()) { - // LOG.debug("Signer override = {}", signerOverride); - // overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER) - // } + String signer = conf.getTrimmed(SIGNING_ALGORITHM, ""); + if (!signer.isEmpty()) { + LOG.debug("Signer override = {}", signer); + overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER, + SignerFactory.createSigner(signer, SIGNING_ALGORITHM)); + } + + initSigner(conf, overrideConfigBuilder, awsServiceIdentifier); return overrideConfigBuilder; } @@ -94,8 +100,10 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf * * @param conf The Hadoop configuration * @return Http client builder + * @throws IOException on any problem */ - public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { + public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) + throws IOException { ApacheHttpClient.Builder httpClientBuilder = ApacheHttpClient.builder(); @@ -109,8 +117,7 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); httpClientBuilder.socketTimeout(Duration.ofSeconds(socketTimeout)); - // TODO: Need to set ssl socket factory, as done in - // NetworkBinding.bindSSLChannelMode(conf, awsConf); + NetworkBinding.bindSSLChannelMode(conf, httpClientBuilder); return httpClientBuilder; } @@ -136,7 +143,7 @@ public static NettyNioAsyncHttpClient.Builder createAsyncHttpClientBuilder(Confi httpClientBuilder.readTimeout(Duration.ofSeconds(socketTimeout)); httpClientBuilder.writeTimeout(Duration.ofSeconds(socketTimeout)); - // TODO: Need to set ssl socket factory, as done in + // TODO: Don't think you can set a socket factory for the netty client. // NetworkBinding.bindSSLChannelMode(conf, awsConf); return httpClientBuilder; @@ -176,14 +183,15 @@ public static ProxyConfiguration createProxyConfiguration(Configuration conf, if (!proxyHost.isEmpty()) { if (proxyPort >= 0) { - proxyConfigBuilder.endpoint(buildURI(proxyHost, proxyPort)); + String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http"; + proxyConfigBuilder.endpoint(buildURI(scheme, proxyHost, proxyPort)); } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + if (conf.getBoolean(PROXY_SECURED, false)) { LOG.warn("Proxy host set without port. Using HTTPS default 443"); - proxyConfigBuilder.endpoint(buildURI(proxyHost, 443)); + proxyConfigBuilder.endpoint(buildURI("https", proxyHost, 443)); } else { LOG.warn("Proxy host set without port. Using HTTP default 80"); - proxyConfigBuilder.endpoint(buildURI(proxyHost, 80)); + proxyConfigBuilder.endpoint(buildURI("http", proxyHost, 80)); } } final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, @@ -235,17 +243,21 @@ public static ProxyConfiguration createProxyConfiguration(Configuration conf, if (!proxyHost.isEmpty()) { if (proxyPort >= 0) { + String scheme = conf.getBoolean(PROXY_SECURED, false) ? "https" : "http"; proxyConfigBuilder.host(proxyHost); proxyConfigBuilder.port(proxyPort); + proxyConfigBuilder.scheme(scheme); } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + if (conf.getBoolean(PROXY_SECURED, false)) { LOG.warn("Proxy host set without port. Using HTTPS default 443"); proxyConfigBuilder.host(proxyHost); proxyConfigBuilder.port(443); + proxyConfigBuilder.scheme("https"); } else { LOG.warn("Proxy host set without port. Using HTTP default 80"); proxyConfigBuilder.host(proxyHost); proxyConfigBuilder.port(80); + proxyConfigBuilder.scheme("http"); } } final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, @@ -287,9 +299,9 @@ public static ProxyConfiguration createProxyConfiguration(Configuration conf, * @param port proxy port * @return uri with host and port */ - private static URI buildURI(String host, int port) { + private static URI buildURI(String scheme, String host, int port) { try { - return new URIBuilder().setHost(host).setPort(port).build(); + return new URIBuilder().setScheme(scheme).setHost(host).setPort(port).build(); } catch (URISyntaxException e) { String msg = "Proxy error: incorrect " + PROXY_HOST + " or " + PROXY_PORT; @@ -319,6 +331,30 @@ private static void initUserAgent(Configuration conf, clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); } + private static void initSigner(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) + throws IOException { + String configKey = null; + switch (awsServiceIdentifier) { + case AWS_SERVICE_IDENTIFIER_S3: + configKey = SIGNING_ALGORITHM_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = SIGNING_ALGORITHM_STS; + break; + default: + // Nothing to do. The original signer override is already setup + } + if (configKey != null) { + String signerOverride = conf.getTrimmed(configKey, ""); + if (!signerOverride.isEmpty()) { + LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, signerOverride); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.SIGNER, + SignerFactory.createSigner(signerOverride, configKey)); + } + } + } + /** * Configures request timeout. * diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index e36842c39b731..2c9d6857b46a2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -207,15 +207,13 @@ public void processResponse(final CopyObjectResponse copyObjectResponse) // be the same on the copy. As such, there isn't really anything that // can be verified on the response, except that a revision ID is present // if required. - // TODO: Commenting out temporarily, due to the TM not returning copyObjectResult - // in the response. -// String newRevisionId = policy.getRevisionId(copyObjectResponse); -// LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); -// if (newRevisionId == null && policy.isRequireVersion()) { -// throw new NoVersionAttributeException(uri, String.format( -// "Change detection policy requires %s", -// policy.getSource())); -// } + String newRevisionId = policy.getRevisionId(copyObjectResponse); + LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); + if (newRevisionId == null && policy.isRequireVersion()) { + throw new NoVersionAttributeException(uri, String.format( + "Change detection policy requires %s", + policy.getSource())); + } } /** @@ -232,9 +230,6 @@ public void processException(SdkException e, String operation) throws RemoteFileChangedException { if (e instanceof AwsServiceException) { AwsServiceException serviceException = (AwsServiceException)e; - // TODO: Verify whether this is fixed in SDK v2. - // In SDK v1, this wasn't really going to be hit due to - // https://github.com/aws/aws-sdk-java/issues/1644 if (serviceException.statusCode() == SC_412_PRECONDITION_FAILED) { versionMismatches.versionMismatchError(); throw new RemoteFileChangedException(uri, operation, String.format( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java index 8205668e8f354..ba21f6028a5f8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ConfigureShadedAWSSocketFactory.java @@ -21,8 +21,8 @@ import javax.net.ssl.HostnameVerifier; import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.thirdparty.apache.http.conn.ssl.SSLConnectionSocketFactory; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.thirdparty.org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; @@ -35,13 +35,12 @@ public class ConfigureShadedAWSSocketFactory implements NetworkBinding.ConfigureAWSSocketFactory { @Override - public void configureSocketFactory(final ClientConfiguration awsConf, + public void configureSocketFactory(final ApacheHttpClient.Builder httpClientBuilder, final DelegatingSSLSocketFactory.SSLChannelMode channelMode) throws IOException { DelegatingSSLSocketFactory.initializeDefaultFactory(channelMode); - awsConf.getApacheHttpClientConfig().setSslSocketFactory( - new SSLConnectionSocketFactory( - DelegatingSSLSocketFactory.getDefaultFactory(), - (HostnameVerifier) null)); + httpClientBuilder.socketFactory(new SSLConnectionSocketFactory( + DelegatingSSLSocketFactory.getDefaultFactory(), + (HostnameVerifier) null)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java index 4926ff13f8d0c..d42dda59caa5f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java @@ -30,6 +30,7 @@ import java.util.TreeMap; import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -276,9 +277,28 @@ private Map retrieveHeaders( final Statistic statistic) throws IOException { StoreContext context = getStoreContext(); String objectKey = context.pathToKey(path); - HeadObjectResponse md; String symbol = statistic.getSymbol(); S3AStatisticsContext instrumentation = context.getInstrumentation(); + Map headers = new TreeMap<>(); + HeadObjectResponse md; + + // Attempting to get metadata for the root, so use head bucket. + if (objectKey.isEmpty()) { + HeadBucketResponse headBucketResponse = + trackDuration(instrumentation, symbol, () -> callbacks.getBucketMetadata()); + + if (headBucketResponse.sdkHttpResponse() != null + && headBucketResponse.sdkHttpResponse().headers() != null + && headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE) != null) { + maybeSetHeader(headers, XA_CONTENT_TYPE, + headBucketResponse.sdkHttpResponse().headers().get(AWSHeaders.CONTENT_TYPE).get(0)); + } + + maybeSetHeader(headers, XA_CONTENT_LENGTH, 0); + + return headers; + } + try { md = trackDuration(instrumentation, symbol, () -> callbacks.getObjectMetadata(objectKey)); @@ -289,7 +309,6 @@ private Map retrieveHeaders( } // all user metadata Map rawHeaders = md.metadata(); - Map headers = new TreeMap<>(); rawHeaders.forEach((key, value) -> headers.put(XA_HEADER_PREFIX + key, encodeBytes(value))); @@ -346,9 +365,7 @@ private Map retrieveHeaders( md.serverSideEncryptionAsString()); maybeSetHeader(headers, XA_STORAGE_CLASS, md.storageClassAsString()); - // TODO: check this, looks wrong. - // maybeSetHeader(headers, XA_STORAGE_CLASS, -// md.getReplicationStatus()); + return headers; } @@ -525,5 +542,14 @@ public interface HeaderProcessingCallbacks { */ @Retries.RetryTranslated HeadObjectResponse getObjectMetadata(String key) throws IOException; + + /** + * Retrieve the bucket metadata. + * + * @return metadata + * @throws IOException IO and object access problems. + */ + @Retries.RetryTranslated + HeadBucketResponse getBucketMetadata() throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java index 575a3d1b2de81..34b4049b06123 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java @@ -23,9 +23,9 @@ import java.net.URI; import java.net.URISyntaxException; -import com.amazonaws.ClientConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.apache.ApacheHttpClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetUtils; @@ -56,13 +56,12 @@ private NetworkBinding() { * so as to avoid * @param conf the {@link Configuration} used to get the client specified * value of {@code SSL_CHANNEL_MODE} - * @param awsConf the {@code ClientConfiguration} to set the - * SSLConnectionSocketFactory for. + * @param httpClientBuilder the http client builder. * @throws IOException if there is an error while initializing the * {@code SSLSocketFactory} other than classloader problems. */ public static void bindSSLChannelMode(Configuration conf, - ClientConfiguration awsConf) throws IOException { + ApacheHttpClient.Builder httpClientBuilder) throws IOException { // Validate that SSL_CHANNEL_MODE is set to a valid value. String channelModeString = conf.getTrimmed( @@ -89,7 +88,7 @@ public static void bindSSLChannelMode(Configuration conf, (Class) Class.forName(BINDING_CLASSNAME); clazz.getConstructor() .newInstance() - .configureSocketFactory(awsConf, channelMode); + .configureSocketFactory(httpClientBuilder, channelMode); } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException | LinkageError e) { @@ -103,7 +102,7 @@ public static void bindSSLChannelMode(Configuration conf, * works with the shaded AWS libraries to exist in their own class. */ interface ConfigureAWSSocketFactory { - void configureSocketFactory(ClientConfiguration awsConf, + void configureSocketFactory(ApacheHttpClient.Builder httpClientBuilder, DelegatingSSLSocketFactory.SSLChannelMode channelMode) throws IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java new file mode 100644 index 0000000000000..5e4c3cf37e5ce --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListener.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Interface for progress listeners to implement. + */ +public interface ProgressListener { + default void progressChanged(ProgressListenerEvent eventType, long bytesTransferred) {}; +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java new file mode 100644 index 0000000000000..f3f9fb61e434d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ProgressListenerEvent.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +/** + * Enum for progress listener events. + */ +public enum ProgressListenerEvent { + REQUEST_BYTE_TRANSFER_EVENT, + TRANSFER_PART_STARTED_EVENT, + TRANSFER_PART_COMPLETED_EVENT, + TRANSFER_PART_FAILED_EVENT; +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index bbf0c384456ee..cacbee381bebc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -34,13 +34,13 @@ import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.MetadataDirective; -import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; @@ -99,7 +99,7 @@ public class RequestFactoryImpl implements RequestFactory { /** * ACL For new objects. */ - private final ObjectCannedACL cannedACL; + private final String cannedACL; /** * Max number of multipart entries allowed in a large @@ -161,7 +161,7 @@ private T prepareRequest(T t) { * @return an ACL, if any */ @Override - public ObjectCannedACL getCannedACL() { + public String getCannedACL() { return cannedACL; } @@ -207,7 +207,6 @@ public StorageClass getStorageClass() { */ protected void uploadPartEncryptionParameters( UploadPartRequest.Builder builder) { - // TODO: review/refactor together with similar methods for other requests. // need to set key to get objects encrypted with SSE_C EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) @@ -236,7 +235,7 @@ public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, Map dstom = new HashMap<>(); HeaderProcessing.cloneObjectMetadata(srcom, dstom, copyObjectRequestBuilder); - copyEncryptionParameters(copyObjectRequestBuilder); + copyEncryptionParameters(srcom, copyObjectRequestBuilder); copyObjectRequestBuilder .metadata(dstom) @@ -257,11 +256,22 @@ public CopyObjectRequest.Builder newCopyObjectRequestBuilder(String srcKey, * Propagate encryption parameters from source file if set else use the * current filesystem encryption settings. * @param copyObjectRequestBuilder copy object request builder. + * @param srcom source object metadata. */ - protected void copyEncryptionParameters(CopyObjectRequest.Builder copyObjectRequestBuilder) { + protected void copyEncryptionParameters(HeadObjectResponse srcom, + CopyObjectRequest.Builder copyObjectRequestBuilder) { final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); + String sourceKMSId = srcom.ssekmsKeyId(); + if (isNotEmpty(sourceKMSId)) { + // source KMS ID is propagated + LOG.debug("Propagating SSE-KMS settings from source {}", + sourceKMSId); + copyObjectRequestBuilder.ssekmsKeyId(sourceKMSId); + return; + } + if (S3AEncryptionMethods.SSE_S3 == algorithm) { copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod()); } else if (S3AEncryptionMethods.SSE_KMS == algorithm) { @@ -474,6 +484,15 @@ public HeadObjectRequest.Builder newHeadObjectRequestBuilder(String key) { return prepareRequest(headObjectRequestBuilder); } + @Override + public HeadBucketRequest.Builder newHeadBucketRequestBuilder(String bucketName) { + + HeadBucketRequest.Builder headBucketRequestBuilder = + HeadBucketRequest.builder().bucket(bucketName); + + return prepareRequest(headBucketRequestBuilder); + } + @Override public GetObjectRequest.Builder newGetObjectRequestBuilder(String key) { GetObjectRequest.Builder builder = GetObjectRequest.builder() @@ -613,7 +632,7 @@ public static final class RequestFactoryBuilder { /** * ACL For new objects. */ - private ObjectCannedACL cannedACL = null; + private String cannedACL = null; /** Content Encoding. */ private String contentEncoding; @@ -696,7 +715,7 @@ public RequestFactoryBuilder withEncryptionSecrets( * @return the builder */ public RequestFactoryBuilder withCannedACL( - final ObjectCannedACL value) { + final String value) { cannedACL = value; return this; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index d1647fb3b2f54..d233081ee6851 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -64,7 +64,7 @@ public void setup() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); - s3 = fs.getAmazonS3V2ClientForTesting("mocking"); + s3 = fs.getAmazonS3ClientForTesting("mocking"); } public Configuration createConfiguration() { @@ -79,6 +79,8 @@ public Configuration createConfiguration() { // assertions to be safely made without worrying // about any race conditions conf.setInt(ASYNC_DRAIN_THRESHOLD, Integer.MAX_VALUE); + // set the region to avoid the getBucketLocation on FS init. + conf.set(AWS_REGION, "eu-west-1"); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index 9485202f64cb4..2507ae2f5104f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -125,11 +125,12 @@ private Configuration createConfigurationWithProbe(final int probe) { Configuration conf = new Configuration(getFileSystem().getConf()); S3ATestUtils.disableFilesystemCaching(conf); conf.setInt(S3A_BUCKET_PROBE, probe); + conf.set(AWS_REGION, "eu-west-1"); return conf; } @Test - public void testBucketProbingV1() throws Exception { + public void testBucketProbing() throws Exception { describe("Test the V1 bucket probe"); Configuration configuration = createConfigurationWithProbe(1); expectUnknownStore( @@ -137,18 +138,24 @@ public void testBucketProbingV1() throws Exception { } @Test - public void testBucketProbingV2() throws Exception { - describe("Test the V2 bucket probe"); + public void testBucketProbing2() throws Exception { + describe("Test the bucket probe with probe value set to 2"); Configuration configuration = createConfigurationWithProbe(2); + expectUnknownStore( () -> FileSystem.get(uri, configuration)); - /* - * Bucket probing should also be done when value of - * S3A_BUCKET_PROBE is greater than 2. - */ - configuration.setInt(S3A_BUCKET_PROBE, 3); - expectUnknownStore( - () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbing3() throws Exception { + describe("Test the bucket probe with probe value set to 3"); + Configuration configuration = createConfigurationWithProbe(3); + fs = FileSystem.get(uri, configuration); + Path root = new Path(uri); + + assertTrue("root path should always exist", fs.exists(root)); + assertTrue("getFileStatus on root should always return a directory", + fs.getFileStatus(root).isDirectory()); } @Test @@ -162,8 +169,8 @@ public void testBucketProbingParameterValidation() throws Exception { } @Test - public void testAccessPointProbingV2() throws Exception { - describe("Test V2 bucket probing using an AccessPoint ARN"); + public void testAccessPointProbing2() throws Exception { + describe("Test bucket probing using probe value 2, and an AccessPoint ARN"); Configuration configuration = createArnConfiguration(); String accessPointArn = "arn:aws:s3:eu-west-1:123456789012:accesspoint/" + randomBucket; configuration.set(String.format(InternalConstants.ARN_BUCKET_OPTION, randomBucket), @@ -175,7 +182,7 @@ public void testAccessPointProbingV2() throws Exception { @Test public void testAccessPointRequired() throws Exception { - describe("Test V2 bucket probing with 'fs.s3a.accesspoint.required' property."); + describe("Test bucket probing with 'fs.s3a.accesspoint.required' property."); Configuration configuration = createArnConfiguration(); configuration.set(AWS_S3_ACCESSPOINT_REQUIRED, "true"); intercept(PathIOException.class, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index 1b65b5ded9ae9..7ffb16833e044 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -57,8 +57,6 @@ protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, CANNED_ACL); - // TODO: Check why we need this ACL? V2 does not have a LOG_DELIVERY_WRITE ACL which causes - // this test to fail. conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); // needed because of direct calls made conf.setBoolean(S3AAuditConstants.REJECT_OUT_OF_SPAN_OPERATIONS, false); @@ -92,7 +90,7 @@ private void assertObjectHasLoggingGrant(Path path, boolean isFile) { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("acls"); + S3Client s3 = fs.getAmazonS3ClientForTesting("acls"); String key = storeContext.pathToKey(path); if (!isFile) { key = key + "/"; @@ -105,8 +103,6 @@ private void assertObjectHasLoggingGrant(Path path, boolean isFile) { for (Grant grant : grants) { LOG.info("{}", grant.toString()); } - // TODO: Review whether this test is required in v2. - // Reproduces v1's GroupGrantee.LogDelivery Grant loggingGrant = Grant.builder() .grantee(Grantee.builder() .type(Type.GROUP) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index ad2c16bae1d9e..f7bdaa62422ed 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -18,11 +18,17 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.ClientConfiguration; import software.amazon.awssdk.core.client.config.SdkClientConfiguration; import software.amazon.awssdk.core.client.config.SdkClientOption; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.model.StsException; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.reflect.FieldUtils; @@ -30,11 +36,11 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.auth.STSClientFactory; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.test.GenericTestUtils; import org.assertj.core.api.Assertions; -import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -54,7 +60,6 @@ import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; - import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; @@ -113,7 +118,7 @@ public void testEndpoint() throws Exception { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("test endpoint"); + S3Client s3 = fs.getAmazonS3ClientForTesting("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -353,7 +358,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() try { fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("configuration"); + S3Client s3 = fs.getAmazonS3ClientForTesting("configuration"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, @@ -388,7 +393,7 @@ public void testDefaultUserAgent() throws Exception { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("User Agent"); + S3Client s3 = fs.getAmazonS3ClientForTesting("User Agent"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); @@ -403,7 +408,7 @@ public void testCustomUserAgent() throws Exception { conf.set(Constants.USER_AGENT_PREFIX, "MyApp"); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("User agent"); + S3Client s3 = fs.getAmazonS3ClientForTesting("User agent"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); @@ -417,7 +422,7 @@ public void testRequestTimeout() throws Exception { conf = new Configuration(); conf.set(REQUEST_TIMEOUT, "120"); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("Request timeout (ms)"); + S3Client s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); assertEquals("Configured " + REQUEST_TIMEOUT + @@ -525,37 +530,74 @@ public void testConfOptionPropagationToFS() throws Exception { @Test(timeout = 10_000L) public void testS3SpecificSignerOverride() throws IOException { - ClientConfiguration clientConfiguration = null; - Configuration config; - - String signerOverride = "testSigner"; - String s3SignerOverride = "testS3Signer"; - - // Default SIGNING_ALGORITHM, overridden for S3 only - config = new Configuration(); - config.set(SIGNING_ALGORITHM_S3, s3SignerOverride); - - // TODO: update during signer work. - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_S3); - Assert.assertEquals(s3SignerOverride, - clientConfiguration.getSignerOverride()); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_STS); - Assert.assertNull(clientConfiguration.getSignerOverride()); - - // Configured base SIGNING_ALGORITHM, overridden for S3 only - config = new Configuration(); - config.set(SIGNING_ALGORITHM, signerOverride); - config.set(SIGNING_ALGORITHM_S3, s3SignerOverride); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_S3); - Assert.assertEquals(s3SignerOverride, - clientConfiguration.getSignerOverride()); - clientConfiguration = S3AUtils - .createAwsConf(config, "dontcare", AWS_SERVICE_IDENTIFIER_STS); - Assert - .assertEquals(signerOverride, clientConfiguration.getSignerOverride()); + Configuration config = new Configuration(); + + config.set(CUSTOM_SIGNERS, + "CustomS3Signer:" + CustomS3Signer.class.getName() + ",CustomSTSSigner:" + + CustomSTSSigner.class.getName()); + + config.set(SIGNING_ALGORITHM_S3, "CustomS3Signer"); + config.set(SIGNING_ALGORITHM_STS, "CustomSTSSigner"); + + config.set(AWS_REGION, "eu-west-1"); + fs = S3ATestUtils.createTestFileSystem(config); + + S3Client s3Client = fs.getAmazonS3ClientForTesting("testS3SpecificSignerOverride"); + + StsClient stsClient = + STSClientFactory.builder(config, fs.getBucket(), new AnonymousAWSCredentialsProvider(), "", + "").build(); + + try { + stsClient.getSessionToken(); + } catch (StsException exception) { + // Expected 403, as credentials are not provided. + } + + try { + s3Client.headBucket(HeadBucketRequest.builder().bucket(fs.getBucket()).build()); + } catch (S3Exception exception) { + // Expected 403, as credentials are not provided. + } + + Assertions.assertThat(CustomS3Signer.isS3SignerCalled()) + .describedAs("Custom S3 signer not called").isTrue(); + + Assertions.assertThat(CustomSTSSigner.isSTSSignerCalled()) + .describedAs("Custom STS signer not called").isTrue(); + } + + public static final class CustomS3Signer implements Signer { + + private static boolean s3SignerCalled = false; + + @Override + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { + LOG.debug("Custom S3 signer called"); + s3SignerCalled = true; + return request; + } + + public static boolean isS3SignerCalled() { + return s3SignerCalled; + } } + public static final class CustomSTSSigner implements Signer { + + private static boolean stsSignerCalled = false; + + @Override + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { + LOG.debug("Custom STS signer called"); + stsSignerCalled = true; + return request; + } + + public static boolean isSTSSignerCalled() { + return stsSignerCalled; + } + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java index 64e37bf832b87..45b0c6c206f2d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java @@ -63,7 +63,7 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption { private static final String SERVICE_AMAZON_S3_STATUS_CODE_403 - = "Service: Amazon S3; Status Code: 403;"; + = "Service: S3, Status Code: 403"; private static final String KEY_1 = "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs="; private static final String KEY_2 diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index add6502d7da71..c957ab7b6a438 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -21,21 +21,27 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.util.AwsHostNameUtils; import org.assertj.core.api.Assertions; +import org.junit.Assert; import org.junit.Test; +import software.amazon.awssdk.awscore.AwsExecutionAttribute; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.interceptor.Context; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; -import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -44,114 +50,104 @@ */ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { - private static final String AWS_REGION_TEST = "test-region"; private static final String AWS_ENDPOINT_TEST = "test-endpoint"; - private static final String AWS_ENDPOINT_TEST_WITH_REGION = - "test-endpoint.some-region.amazonaws.com"; - public static final String MARS_NORTH_2 = "mars-north-2"; - /** - * Test to verify that setting a region with the config would bypass the - * construction of region from endpoint. - */ - @Test - public void testWithRegionConfig() { - getFileSystem().getConf().set(AWS_REGION, AWS_REGION_TEST); - - //Creating an endpoint config with a custom endpoint. - AwsClientBuilder.EndpointConfiguration epr = createEpr(AWS_ENDPOINT_TEST, - getFileSystem().getConf().getTrimmed(AWS_REGION)); - //Checking if setting region config bypasses the endpoint region. - Assertions.assertThat(epr.getSigningRegion()) - .describedAs("There is a region mismatch") - .isEqualTo(getFileSystem().getConf().get(AWS_REGION)); - } /** - * Test to verify that not setting the region config, would lead to using - * endpoint to construct the region. + * Test to verify that not setting the region config, will lead to the client factory making + * a HEAD bucket call to configure the correct region. If an incorrect region is set, the HEAD + * bucket call in this test will raise an exception. */ @Test - public void testWithoutRegionConfig() { - getFileSystem().getConf().unset(AWS_REGION); - - //Creating an endpoint config with a custom endpoint containing a region. - AwsClientBuilder.EndpointConfiguration eprRandom = - createEpr(AWS_ENDPOINT_TEST_WITH_REGION, - getFileSystem().getConf().getTrimmed(AWS_REGION)); - String regionFromEndpoint = - AwsHostNameUtils - .parseRegionFromAwsPartitionPattern(AWS_ENDPOINT_TEST_WITH_REGION); - //Checking if not setting region config leads to constructing the region - // from endpoint. - Assertions.assertThat(eprRandom.getSigningRegion()) - .describedAs("There is a region mismatch") - .isNotEqualTo(getFileSystem().getConf().get(AWS_REGION)) - .isEqualTo(regionFromEndpoint); - } + public void testWithoutRegionConfig() throws IOException { + Configuration conf = getConfiguration(); + String bucket = getFileSystem().getBucket(); + conf.unset(String.format("fs.s3a.bucket.%s.endpoint.region", bucket)); + conf.unset(AWS_REGION); - /** - * Method to create EndpointConfiguration using an endpoint. - * - * @param endpoint the endpoint to be used for EndpointConfiguration creation. - * @return an instance of EndpointConfiguration. - */ - private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint, - String awsRegion) { - return DefaultS3ClientFactory.createEndpointConfiguration(endpoint, - new ClientConfiguration(), awsRegion); + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(getFileSystem().getUri(), conf); + + try { + fs.getBucketMetadata(); + } catch (S3Exception exception) { + if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { + Assert.fail(exception.toString()); + } + } + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Region is not configured, region probe should have been made").isEqualTo(1); } @Test - public void testInvalidRegionDefaultEndpoint() throws Throwable { - describe("Create a client with an invalid region and the default endpoint"); + public void testWithRegionConfig() throws IOException, URISyntaxException { Configuration conf = getConfiguration(); - // we are making a big assumption about the timetable for AWS - // region rollout. - // if this test ever fails because this region now exists - // -congratulations! - conf.set(AWS_REGION, MARS_NORTH_2); - createMarsNorth2Client(conf); + conf.set(AWS_REGION, "us-east-2"); + + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(new URI("s3a://landsat-pds"), conf); + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Region is configured, region probe should not have been made").isEqualTo(0); } @Test - public void testUnsetRegionDefaultEndpoint() throws Throwable { - describe("Create a client with no region and the default endpoint"); + public void testRegionCache() throws IOException, URISyntaxException { Configuration conf = getConfiguration(); conf.unset(AWS_REGION); - createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION); + conf.unset("fs.s3a.bucket.landsat-pds.endpoint.region"); + S3AFileSystem fs = new S3AFileSystem(); + + fs.initialize(new URI("s3a://landsat-pds"), conf); + + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Incorrect number of calls made to get bucket region").isEqualTo(1); + + fs.initialize(new URI("s3a://landsat-pds"), conf); + + // value should already be cached. + Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) + .describedAs("Incorrect number of calls made to get bucket region").isEqualTo(0); } - /** - * By setting the system property {@code "aws.region"} we can - * guarantee that the SDK region resolution chain will always succeed - * (and fast). - * Clearly there is no validation of the region during the build process. - */ @Test - public void testBlankRegionTriggersSDKResolution() throws Throwable { - describe("Create a client with a blank region and the default endpoint." - + " This will trigger the SDK Resolution chain"); + public void testEndpointOverride() throws Throwable { + describe("Create a client with no region and the default endpoint"); Configuration conf = getConfiguration(); - conf.set(AWS_REGION, ""); - System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2); - try { - createMarsNorth2Client(conf); - } finally { - System.clearProperty(AWS_REGION_SYSPROP); - } + + S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST); + + intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( + HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); } - /** - * Create an S3 client bonded to an invalid region; - * verify that calling {@code getRegion()} triggers - * a failure. - * @param conf configuration to use in the building. - */ - private void createMarsNorth2Client(Configuration conf) throws Exception { - AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2); - intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion); + + class RegionInterceptor implements ExecutionInterceptor { + private boolean endpointOverridden; + + RegionInterceptor(boolean endpointOverridden) { + this.endpointOverridden = endpointOverridden; + } + + @Override + public void beforeExecution(Context.BeforeExecution context, + ExecutionAttributes executionAttributes) { + + if (endpointOverridden) { + Assertions.assertThat( + executionAttributes.getAttribute(AwsExecutionAttribute.ENDPOINT_OVERRIDDEN)) + .describedAs("Endpoint not overridden").isTrue(); + + Assertions.assertThat( + executionAttributes.getAttribute(AwsExecutionAttribute.CLIENT_ENDPOINT).toString()) + .describedAs("There is an endpoint mismatch").isEqualTo("https://" + AWS_ENDPOINT_TEST); + } + + // We don't actually want to make a request, so exit early. + throw AwsServiceException.builder().message("Exception thrown by interceptor").build(); + } } /** @@ -160,16 +156,23 @@ private void createMarsNorth2Client(Configuration conf) throws Exception { * value. * @param conf configuration to use. * @param endpoint endpoint. - * @param expectedRegion expected region * @return the client. * @throws URISyntaxException parse problems. * @throws IOException IO problems */ @SuppressWarnings("deprecation") - private AmazonS3 createS3Client(Configuration conf, - String endpoint, - String expectedRegion) - throws URISyntaxException, IOException { + private S3Client createS3Client(Configuration conf, + String endpoint) + throws IOException { + + boolean endpointOverridden = false; + + if (endpoint != null && !endpoint.isEmpty()) { + endpointOverridden = true; + } + + List interceptors = new ArrayList<>(); + interceptors.add(new RegionInterceptor(endpointOverridden)); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); @@ -177,16 +180,14 @@ private AmazonS3 createS3Client(Configuration conf, S3ClientFactory.S3ClientCreationParameters parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(new AnonymousAWSCredentialsProvider()) - .withPathUri(new URI("s3a://localhost/")) .withEndpoint(endpoint) .withMetrics(new EmptyS3AStatisticsContext() - .newStatisticsFromAwsSdk()); - AmazonS3 client = factory.createS3Client( - new URI("s3a://localhost/"), + .newStatisticsFromAwsSdk()) + .withExecutionInterceptors(interceptors); + + S3Client client = factory.createS3Client( + getFileSystem().getUri(), parameters); - Assertions.assertThat(client.getRegionName()) - .describedAs("Client region name") - .isEqualTo(expectedRegion); return client; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 28625e5755d18..6e85f6bc783dc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -411,7 +411,7 @@ private static T verifyNoTrailingSlash(String role, T o) { */ private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("check default encryption"); + S3Client s3 = fs.getAmazonS3ClientForTesting("check default encryption"); try { return Invoker.once("getBucketEncryption()", fs.getBucket(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index e16a99be8e7c8..0c61caacd055b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -20,44 +20,38 @@ import static org.mockito.Mockito.*; -import java.io.IOException; import java.net.URI; import java.util.ArrayList; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.MultipartUploadListing; -import com.amazonaws.services.s3.model.Region; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsRequest; +import software.amazon.awssdk.services.s3.model.ListMultipartUploadsResponse; import software.amazon.awssdk.transfer.s3.S3TransferManager; /** - * An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3} + * An {@link S3ClientFactory} that returns Mockito mocks of the {@link S3Client} * interface suitable for unit testing. */ public class MockS3ClientFactory implements S3ClientFactory { - // TODO: This will be removed when we remove this method for the client factory. - @Override - public AmazonS3 createS3Client(URI uri, - final S3ClientCreationParameters parameters) { - AmazonS3 s3 = mock(AmazonS3.class); - String bucket = uri.getHost(); - when(s3.doesBucketExist(bucket)).thenReturn(true); - when(s3.doesBucketExistV2(bucket)).thenReturn(true); - // this listing is used in startup if purging is enabled, so - // return a stub value - MultipartUploadListing noUploads = new MultipartUploadListing(); - noUploads.setMultipartUploads(new ArrayList<>(0)); - when(s3.listMultipartUploads(any())).thenReturn(noUploads); - when(s3.getBucketLocation(anyString())).thenReturn(Region.US_West.toString()); - return s3; - } - //TODO: This is incomplete, add in mocks as we update operations @Override - public S3Client createS3ClientV2(URI uri, final S3ClientCreationParameters parameters) { + public S3Client createS3Client(URI uri, final S3ClientCreationParameters parameters) { S3Client s3 = mock(S3Client.class); + // this listing is used in startup if purging is enabled, so + // return a stub value + ListMultipartUploadsResponse noUploads = ListMultipartUploadsResponse.builder() + .uploads(new ArrayList<>(0)) + .isTruncated(false) + .build(); + when(s3.listMultipartUploads((ListMultipartUploadsRequest) any())).thenReturn(noUploads); + when(s3.getBucketLocation((GetBucketLocationRequest) any())).thenReturn( + GetBucketLocationResponse.builder().locationConstraint(Region.US_WEST_2.toString()) + .build()); return s3; } @@ -68,8 +62,7 @@ public S3AsyncClient createS3AsyncClient(URI uri, final S3ClientCreationParamete } @Override - public S3TransferManager createS3TransferManager(URI uri, S3ClientCreationParameters parameters) - throws IOException { + public S3TransferManager createS3TransferManager(S3AsyncClient s3AsyncClient) { S3TransferManager tm = mock(S3TransferManager.class); return tm; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 957db1a038f4b..1eee096180b16 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -34,11 +34,14 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.ContainerCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; + +import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.util.Sets; import org.junit.Rule; import org.junit.Test; @@ -55,6 +58,11 @@ import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ABSTRACT_PROVIDER; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.NOT_AWS_V2_PROVIDER; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; import static org.junit.Assert.*; @@ -62,7 +70,6 @@ /** * Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. */ -// TODO: Add new tests that use a mix of V1 and V2 providers and assert that everything works ok. public class TestS3AAWSCredentialsProvider { /** @@ -149,6 +156,27 @@ public void testDefaultChainNoURI() throws Exception { createAWSCredentialProviderSet(null, conf)); } + @Test + public void testConfiguredChainV1V2() throws Exception { + URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); + List> credentialProviders = + Arrays.asList( + ContainerCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class); + List> expectedClasses = + Arrays.asList( + V1V2AwsCredentialProviderAdapter.class, + AnonymousAWSCredentialsProvider.class); + Configuration conf = + createProviderConfiguration(buildClassListString(credentialProviders)); + AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + uri1, conf); + AWSCredentialProviderList list2 = createAWSCredentialProviderSet( + uri2, conf); + assertCredentialProviders(expectedClasses, list1); + assertCredentialProviders(expectedClasses, list2); + } + @Test public void testConfiguredChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index a89f1744fd2f9..1f85c8fdef304 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -75,7 +75,7 @@ public void testDeleteOnExit() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); testFs.initialize(uri, conf); - S3Client testS3 = testFs.getAmazonS3V2ClientForTesting("mocking"); + S3Client testS3 = testFs.getAmazonS3ClientForTesting("mocking"); Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index fd186e51427cc..33e6b4a08145c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -30,14 +30,13 @@ import java.io.InterruptedIOException; import java.net.SocketTimeoutException; import java.nio.file.AccessDeniedException; -import java.util.Collections; -import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.function.Consumer; import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.http.SdkHttpResponse; import software.amazon.awssdk.services.s3.model.S3Exception; import org.junit.Test; @@ -61,21 +60,26 @@ public class TestS3AExceptionTranslation { = new SocketTimeoutException("socket"); @Test - public void test301ContainsEndpoint() throws Exception { - String bucket = "bucket.s3-us-west-2.amazonaws.com"; - S3Exception s3Exception = createS3Exception("wrong endpoint", + public void test301ContainsRegion() throws Exception { + String region = "us-west-1"; + + AwsErrorDetails redirectError = AwsErrorDetails.builder() + .sdkHttpResponse( + SdkHttpResponse.builder().putHeader(BUCKET_REGION_HEADER, region).build()) + .build(); + + S3Exception s3Exception = createS3Exception("wrong region", SC_301_MOVED_PERMANENTLY, - Collections.singletonMap(S3AUtils.ENDPOINT_KEY, - bucket)); + redirectError); AWSRedirectException ex = verifyTranslated( AWSRedirectException.class, s3Exception); assertStatusCode(SC_301_MOVED_PERMANENTLY, ex); assertNotNull(ex.getMessage()); - assertContained(ex.getMessage(), bucket); - assertContained(ex.getMessage(), ENDPOINT); - assertExceptionContains(ENDPOINT, ex, "endpoint"); - assertExceptionContains(bucket, ex, "bucket name"); + assertContained(ex.getMessage(), region); + assertContained(ex.getMessage(), AWS_REGION); + assertExceptionContains(AWS_REGION, ex, "region"); + assertExceptionContains(region, ex, "region name"); } protected void assertContained(String text, String contained) { @@ -189,16 +193,13 @@ private static S3Exception createS3Exception(int code) { } private static S3Exception createS3Exception(String message, int code, - Map additionalDetails) { + AwsErrorDetails additionalDetails) { + S3Exception source = (S3Exception) S3Exception.builder() .message(message) .statusCode(code) + .awsErrorDetails(additionalDetails) .build(); - // TODO: is there an equivalent for v2? - // currently used to retrieve endpoint on redirect - // see S3AUtils.translateException and - // https://github.com/aws/aws-sdk-java-v2/issues/3048 - // source.setAdditionalDetails(additionalDetails); return source; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java index e05ee25adfa74..0982c8cbd4761 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AProxy.java @@ -20,18 +20,17 @@ import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; import org.assertj.core.api.Assertions; import org.junit.Test; +import software.amazon.awssdk.http.apache.ProxyConfiguration; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.test.AbstractHadoopTestBase; import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; import static org.apache.hadoop.fs.s3a.Constants.PROXY_SECURED; -import static org.apache.hadoop.fs.s3a.S3AUtils.initProxySupport; /** * Tests to verify {@link S3AUtils} translates the proxy configurations @@ -79,11 +78,16 @@ public void testProxyDefault() throws IOException { private void verifyProxy(Configuration proxyConfig, boolean isExpectedSecured) throws IOException { - ClientConfiguration awsConf = new ClientConfiguration(); - initProxySupport(proxyConfig, "test-bucket", awsConf); - Assertions.assertThat(awsConf.getProxyProtocol()) + ProxyConfiguration config = + AWSClientConfig.createProxyConfiguration(proxyConfig, "testBucket"); + ProxyConfiguration asyncConfig = + AWSClientConfig.createProxyConfiguration(proxyConfig, "testBucket"); + Assertions.assertThat(config.scheme()) .describedAs("Proxy protocol not as expected") - .isEqualTo(isExpectedSecured ? Protocol.HTTPS : Protocol.HTTP); + .isEqualTo(isExpectedSecured ? "https" : "http"); + Assertions.assertThat(asyncConfig.scheme()) + .describedAs("Proxy protocol not as expected") + .isEqualTo(isExpectedSecured ? "https" : "http"); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java index a2b013f468a79..9e903fd85ff49 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestWildflyAndOpenSSLBinding.java @@ -20,10 +20,9 @@ import java.io.IOException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; import org.junit.Before; import org.junit.Test; +import software.amazon.awssdk.http.apache.ApacheHttpClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; @@ -74,7 +73,7 @@ public void testUnknownMode() throws Throwable { Configuration conf = new Configuration(false); conf.set(SSL_CHANNEL_MODE, "no-such-mode "); intercept(IllegalArgumentException.class, () -> - bindSSLChannelMode(conf, new ClientConfiguration())); + bindSSLChannelMode(conf, ApacheHttpClient.builder())); } @Test @@ -143,9 +142,7 @@ private DelegatingSSLSocketFactory.SSLChannelMode bindSocketFactory( DelegatingSSLSocketFactory.resetDefaultFactory(); Configuration conf = new Configuration(false); conf.set(SSL_CHANNEL_MODE, channelMode.name()); - ClientConfiguration awsConf = new ClientConfiguration(); - awsConf.setProtocol(Protocol.HTTPS); - bindSSLChannelMode(conf, awsConf); + bindSSLChannelMode(conf, ApacheHttpClient.builder()); return DelegatingSSLSocketFactory.getDefaultFactory().getChannelMode(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 70d91ba7b113f..1c6e00655acb2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -58,7 +58,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.E_FORBIDDEN_AWS_PROVIDER; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.*; import static org.apache.hadoop.fs.s3a.auth.RoleModel.*; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java index a829d470e7a66..cdf89211fd7fc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java @@ -25,12 +25,12 @@ import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; -import com.amazonaws.SignableRequest; -import com.amazonaws.auth.AWS4Signer; -import com.amazonaws.arn.Arn; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.Signer; -import com.amazonaws.services.s3.internal.AWSS3V4Signer; +import software.amazon.awssdk.arns.Arn; +import software.amazon.awssdk.auth.signer.Aws4Signer; +import software.amazon.awssdk.auth.signer.AwsS3V4Signer; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; @@ -183,14 +183,15 @@ public CustomSigner() { * request because the signature calculated by the service doesn't match * what we sent. * @param request the request to sign. - * @param credentials credentials used to sign the request. + * @param executionAttributes request executionAttributes which contain the credentials. */ @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest request, + ExecutionAttributes executionAttributes) { int c = INVOCATION_COUNT.incrementAndGet(); LOG.info("Signing request #{}", c); - String host = request.getEndpoint().getHost(); + String host = request.host(); String bucketName = parseBucketFromHost(host); try { lastStoreValue = CustomSignerInitializer @@ -199,19 +200,11 @@ public void sign(SignableRequest request, AWSCredentials credentials) { throw new RuntimeException("Failed to get current Ugi", e); } if (bucketName.equals("kms")) { - AWS4Signer realKMSSigner = new AWS4Signer(); - realKMSSigner.setServiceName("kms"); - if (lastStoreValue != null) { - realKMSSigner.setRegionName(lastStoreValue.conf.get(TEST_REGION_KEY)); - } - realKMSSigner.sign(request, credentials); + Aws4Signer realKMSSigner = Aws4Signer.create(); + return realKMSSigner.sign(request, executionAttributes); } else { - AWSS3V4Signer realSigner = new AWSS3V4Signer(); - realSigner.setServiceName("s3"); - if (lastStoreValue != null) { - realSigner.setRegionName(lastStoreValue.conf.get(TEST_REGION_KEY)); - } - realSigner.sign(request, credentials); + AwsS3V4Signer realSigner = AwsS3V4Signer.create(); + return realSigner.sign(request, executionAttributes); } } @@ -235,11 +228,11 @@ private String parseBucketFromHost(String host) { String accessPointName = bucketName.substring(0, bucketName.length() - (accountId.length() + 1)); Arn arn = Arn.builder() - .withAccountId(accountId) - .withPartition("aws") - .withRegion(hostBits[2]) - .withResource("accesspoint" + "/" + accessPointName) - .withService("s3").build(); + .accountId(accountId) + .partition("aws") + .region(hostBits[2]) + .resource("accesspoint" + "/" + accessPointName) + .service("s3").build(); bucketName = arn.toString(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java index ca87b5c1b34a6..595e2687276b1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestSignerManager.java @@ -19,8 +19,6 @@ import java.io.Closeable; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.security.PrivilegedExceptionAction; import java.util.HashMap; @@ -28,12 +26,10 @@ import java.util.Objects; import java.util.concurrent.TimeUnit; -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.DefaultRequest; -import com.amazonaws.SignableRequest; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.Signer; -import com.amazonaws.auth.SignerFactory; +import software.amazon.awssdk.core.interceptor.ExecutionAttributes; +import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.SdkHttpFullRequest; +import software.amazon.awssdk.http.SdkHttpMethod; import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Rule; @@ -284,7 +280,7 @@ private void attemptSignAndVerify(String identifier, String bucket, throws IOException, InterruptedException { ugi.doAs((PrivilegedExceptionAction) () -> { Signer signer = new SignerForInitializerTest(); - SignableRequest signableRequest = constructSignableRequest(bucket); + SdkHttpFullRequest signableRequest = constructSignableRequest(bucket); signer.sign(signableRequest, null); verifyStoreValueInSigner(expectNullStoreInfo, bucket, identifier); return null; @@ -336,8 +332,10 @@ public static class SignerForTest1 implements Signer { private static boolean initialized = false; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { initialized = true; + return sdkHttpFullRequest; } public static void reset() { @@ -354,8 +352,10 @@ public static class SignerForTest2 implements Signer { private static boolean initialized = false; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { initialized = true; + return sdkHttpFullRequest; } public static void reset() { @@ -472,11 +472,15 @@ public static class SignerForInitializerTest implements Signer { private static StoreValue retrievedStoreValue; @Override - public void sign(SignableRequest request, AWSCredentials credentials) { - String bucketName = request.getEndpoint().getHost(); + public SdkHttpFullRequest sign(SdkHttpFullRequest sdkHttpFullRequest, + ExecutionAttributes executionAttributes) { + String bucket = sdkHttpFullRequest.host().split("//")[1]; + // remove trailing slash + String bucketName = bucket.substring(0, bucket.length() - 1); try { retrievedStoreValue = SignerInitializerForTest .getStoreInfo(bucketName, UserGroupInformation.getCurrentUser()); + return sdkHttpFullRequest; } catch (IOException e) { throw new RuntimeException("Failed to get current ugi", e); } @@ -579,12 +583,9 @@ private String createTokenIdentifierString(String identifier, return identifier + "_" + bucketName + "_" + user; } - private SignableRequest constructSignableRequest(String bucketName) - throws URISyntaxException { - DefaultRequest signableRequest = new DefaultRequest( - AmazonWebServiceRequest.NOOP, "fakeservice"); - URI uri = new URI("s3://" + bucketName + "/"); - signableRequest.setEndpoint(uri); - return signableRequest; + private SdkHttpFullRequest constructSignableRequest(String bucketName) { + String host = "s3://" + bucketName + "/"; + return SdkHttpFullRequest.builder().host(host).protocol("https").method(SdkHttpMethod.GET) + .build(); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java index dd513055b91ac..28784b17c9ce8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java @@ -26,6 +26,7 @@ import java.net.URI; import java.nio.file.AccessDeniedException; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.junit.AfterClass; @@ -176,8 +177,7 @@ protected Configuration createConfiguration() { conf.set(YarnConfiguration.RM_PRINCIPAL, YARN_RM); // turn on ACLs so as to verify role DT permissions include // write access. - // TODO: Why do we need this? Can we get rid of ACLs? - // conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); + conf.set(CANNED_ACL, LOG_DELIVERY_WRITE); return conf; } @@ -330,10 +330,9 @@ public void testDelegatedFileSystem() throws Throwable { + " if role restricted, permissions are tightened."); S3AFileSystem fs = getFileSystem(); // force a probe of the remote FS to make sure its endpoint is valid - // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would - // return some metadata. (bucket region, content type). headObject() fails without a key, check - // how this can be fixed. - // fs.getObjectMetadata(new Path("/")); + // TODO: Check what should happen here. Calling headObject() on the root path fails in V2, + // with the error that key cannot be empty. + // fs.getObjectMetadata(new Path("/")); readLandsatMetadata(fs); URI uri = fs.getUri(); @@ -588,17 +587,18 @@ protected HeadBucketResponse readLandsatMetadata(final S3AFileSystem delegatedFS URI landsat = new URI(DEFAULT_CSVTEST_FILE); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); - factory.setConf(new Configuration(delegatedFS.getConf())); + Configuration conf = delegatedFS.getConf(); + factory.setConf(conf); String host = landsat.getHost(); S3ClientFactory.S3ClientCreationParameters parameters = null; parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(testingCreds) .withPathUri(new URI("s3a://localhost/")) - .withEndpoint(DEFAULT_ENDPOINT) .withMetrics(new EmptyS3AStatisticsContext() .newStatisticsFromAwsSdk()) - .withUserAgentSuffix("ITestSessionDelegationInFilesystem"); - S3Client s3 = factory.createS3ClientV2(landsat, parameters); + .withUserAgentSuffix("ITestSessionDelegationInFilesystem") + .withRegion(Region.US_WEST_2); + S3Client s3 = factory.createS3Client(landsat, parameters); return Invoker.once("HEAD", host, () -> s3.headBucket(b -> b.bucket(host))); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java index d28f4279f1ba0..fbad671e1fa66 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextStatistics.java @@ -16,7 +16,6 @@ import java.io.IOException; import java.net.URI; -import com.amazonaws.services.s3.model.CryptoStorageMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,7 +76,7 @@ protected void verifyReadBytes(FileSystem.Statistics stats) { *
    * NOTE: if Client side encryption is enabled, expected bytes written * should increase by 16(padding of data) + bytes for the key ID set + 94(KMS - * key generation) in case of storage type{@link CryptoStorageMode} as + * key generation) in case of storage type CryptoStorageMode as * ObjectMetadata(Default). If Crypto Storage mode is instruction file then * add additional bytes as that file is stored separately and would account * for bytes written. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java index b521a81a94942..3a390e34ecad2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestXAttrCost.java @@ -64,9 +64,6 @@ public ITestXAttrCost() { @Test public void testXAttrRoot() throws Throwable { describe("Test xattr on root"); - // TODO: Previously a call to getObjectMetadata for a base path, ie with an empty key would - // return some metadata. (bucket region, content type). headObject() fails without a key, check - // how this can be fixed. Path root = new Path("/"); S3AFileSystem fs = getFileSystem(); Map xAttrs = verifyMetrics( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java index 9d026fd90ee50..81bd8a5efe2e4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestHeaderProcessing.java @@ -27,6 +27,7 @@ import java.util.Map; import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.assertj.core.api.Assertions; import org.assertj.core.util.Lists; @@ -321,6 +322,11 @@ public HeadObjectResponse getObjectMetadata(final String key) } + @Override + public HeadBucketResponse getBucketMetadata() throws IOException { + return HeadBucketResponse.builder().build(); + } + public void setHeader(String key, String val) { userHeaders.put(key, val); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java index 7f51d2b45362c..919a89b8c1dd0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java @@ -18,14 +18,10 @@ package org.apache.hadoop.fs.s3a.impl; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.client.builder.AwsClientBuilder; -import org.junit.Ignore; import org.junit.Test; import org.apache.hadoop.test.AbstractHadoopTestBase; -import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createEndpointConfiguration; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.assertj.core.api.Assertions.assertThat; @@ -63,43 +59,4 @@ private static void assertRegionFixup(String region, String expected) { .describedAs("Fixup of %s", region) .isEqualTo(expected); } - - @Test - public void testNull() throws Throwable { - expectEndpoint("", true, "unused"); - } - - @Test - @Ignore("disabled until endpoint logic works for S3 client builder API") - public void testUSEastEndpoint() throws Throwable { - expectEndpoint(US_EAST_1, false, US_EAST_1); - } - - @Test - @Ignore("disabled until endpoint logic works for S3 client builder API") - public void testUSWestEndpoint() throws Throwable { - expectEndpoint(US_WEST_2, false, US_WEST_2); - } - - public void expectEndpoint(final String src, - final boolean expectNull, - final String expectRegion) { - AwsClientBuilder.EndpointConfiguration epr = - createEndpointConfiguration(src, new ClientConfiguration(), src); - String eprStr = epr == null - ? "(empty)" - : ("(" + epr.getServiceEndpoint() + " " + epr.getSigningRegion()); - if (expectNull) { - assertThat(epr) - .describedAs("Endpoint configuration of %s =", - src, eprStr) - .isNull(); - } else { - assertThat(epr) - .describedAs("Endpoint configuration of %s =", - src, eprStr) - .hasFieldOrPropertyWithValue("serviceEndpoint", src) - .hasFieldOrPropertyWithValue("signingRegion", expectRegion); - } - } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index b16204a260b09..1fb576a55514c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -25,7 +25,6 @@ import software.amazon.awssdk.awscore.AwsRequest; import software.amazon.awssdk.core.SdkRequest; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; -import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; @@ -81,7 +80,7 @@ public void testRequestFactoryWithEncryption() throws Throwable { */ @Test public void testRequestFactoryWithCannedACL() throws Throwable { - ObjectCannedACL acl = ObjectCannedACL.BUCKET_OWNER_FULL_CONTROL; + String acl = "bucket-owner-full-control"; RequestFactory factory = RequestFactoryImpl.builder() .withBucket("bucket") .withCannedACL(acl) @@ -92,17 +91,20 @@ public void testRequestFactoryWithCannedACL() throws Throwable { Assertions.assertThat(factory.newPutObjectRequestBuilder(path, null, 128, false) .build() - .acl()) + .acl() + .toString()) .describedAs("ACL of PUT") .isEqualTo(acl); Assertions.assertThat(factory.newCopyObjectRequestBuilder(path, path2, md) .build() - .acl()) + .acl() + .toString()) .describedAs("ACL of COPY") .isEqualTo(acl); Assertions.assertThat(factory.newMultipartUploadRequestBuilder(path, null) .build() - .acl()) + .acl() + .toString()) .describedAs("ACL of MPU") .isEqualTo(acl); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index 9555e8316380c..de0048c25581c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -214,7 +214,7 @@ protected Configuration createConfiguration() { public void setup() throws Exception { super.setup(); S3AFileSystem fs = getFileSystem(); - s3client = fs.getAmazonS3V2ClientForTesting("markers"); + s3client = fs.getAmazonS3ClientForTesting("markers"); bucket = fs.getBucket(); Path base = new Path(methodPath(), "base"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index 200b1fc282bac..1a30c04358bac 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -26,9 +26,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.IntFunction; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; import org.assertj.core.api.Assertions; import org.junit.FixMethodOrder; import org.junit.Test; @@ -48,6 +45,8 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.impl.ProgressListener; +import org.apache.hadoop.fs.s3a.impl.ProgressListenerEvent; import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.util.DurationInfo; @@ -377,10 +376,9 @@ protected int getPartitionSize() { } /** - * Progress callback from AWS. Likely to come in on a different thread. + * Progress callback. */ - private final class ProgressCallback implements Progressable, - ProgressListener { + private final class ProgressCallback implements Progressable, ProgressListener { private AtomicLong bytesTransferred = new AtomicLong(0); private AtomicLong uploadEvents = new AtomicLong(0); private AtomicInteger failures = new AtomicInteger(0); @@ -395,11 +393,8 @@ public void progress() { } @Override - public void progressChanged(ProgressEvent progressEvent) { - ProgressEventType eventType = progressEvent.getEventType(); - if (eventType.isByteCountEvent()) { - bytesTransferred.addAndGet(progressEvent.getBytesTransferred()); - } + public void progressChanged(ProgressListenerEvent eventType, long transferredBytes) { + switch (eventType) { case TRANSFER_PART_FAILED_EVENT: // failure @@ -408,6 +403,7 @@ public void progressChanged(ProgressEvent progressEvent) { break; case TRANSFER_PART_COMPLETED_EVENT: // completion + bytesTransferred.addAndGet(transferredBytes); long elapsedTime = timer.elapsedTime(); double elapsedTimeS = elapsedTime / 1.0e9; long written = bytesTransferred.get(); @@ -415,21 +411,18 @@ public void progressChanged(ProgressEvent progressEvent) { LOG.info(String.format( "Event %s; total uploaded=%d MB in %.1fs;" + " effective upload bandwidth = %.2f MB/s", - progressEvent, + eventType, writtenMB, elapsedTimeS, writtenMB / elapsedTimeS)); break; case REQUEST_BYTE_TRANSFER_EVENT: uploadEvents.incrementAndGet(); break; default: - if (!eventType.isByteCountEvent()) { - LOG.info("Event {}", progressEvent); - } + // nothing break; } } - @Override public String toString() { String sb = "ProgressCallback{" + "bytesTransferred=" + bytesTransferred.get() + diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java index db9093ea18414..173099bb2ca71 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ADirectoryPerformance.java @@ -258,7 +258,7 @@ public void testMultiPagesListingPerformanceAndCorrectness() originalListOfFiles.add(file.toString()); PutObjectRequest.Builder putObjectRequestBuilder = requestFactory .newPutObjectRequestBuilder(fs.pathToKey(file), - null, 128, false); + null, 0, false); futures.add(submit(executorService, () -> writeOperationHelper.putObject(putObjectRequestBuilder.build(), PutObjectOptions.keepingDirs(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java index 51da971fb7063..4d4af822ee50b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java @@ -415,7 +415,7 @@ public void testSelectSeekFullLandsat() throws Throwable { long increment = 64 * _1KB; // seek forward, comparing bytes - for(offset = 32 * _1KB; offset < _1MB; offset += increment) { + for(offset = 32 * _1KB; offset < 256 * _1KB; offset += increment) { seek(seekStream, offset); assertEquals("Seek position in " + seekStream, offset, seekStream.getPos()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java index 127fcbab75023..ab22c51f28b7b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java @@ -224,13 +224,6 @@ public void testRunNoArgs() throws Throwable { runToFailure(EXIT_USAGE, MARKERS); } - @Test - public void testRunWrongBucket() throws Throwable { - runToFailure(EXIT_NOT_FOUND, MARKERS, - AUDIT, - "s3a://this-bucket-does-not-exist-hopefully"); - } - /** * Run with a path that doesn't exist. */ diff --git a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker deleted file mode 100644 index 3b308f19255c3..0000000000000 --- a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +++ /dev/null @@ -1,13 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -mock-maker-inline \ No newline at end of file From 75220b773873ed8919eace3bacfcf351b0110828 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 17 May 2023 13:35:28 +0100 Subject: [PATCH 12/13] fixes issues after rebase --- .../fs/s3a/audit/impl/LoggingAuditor.java | 30 +++++++++++-------- .../fs/s3a/audit/AbstractAuditingTest.java | 27 ++++++++++++++--- .../audit/TestHttpReferrerAuditHeader.java | 12 +++++--- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java index 602e0ef8ecb5b..3a2d9d7f823ee 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/LoggingAuditor.java @@ -29,6 +29,8 @@ import software.amazon.awssdk.core.interceptor.Context; import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -260,7 +262,8 @@ private class LoggingAuditSpan extends AbstractAuditSpanImpl { /** * Attach Range of data for GetObject Request. - * @param request given get object request + * @param request the sdk request to be modified + * @param executionAttributes execution attributes for this request */ private void attachRangeFromRequest(SdkHttpRequest request, ExecutionAttributes executionAttributes) { @@ -370,12 +373,13 @@ public void set(final String key, final String value) { public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, ExecutionAttributes executionAttributes) { SdkHttpRequest httpRequest = context.httpRequest(); + SdkRequest sdkRequest = context.request(); - // attach range for GetObject requests - attachRangeFromRequest(httpRequest, executionAttributes); + // attach range for GetObject requests + attachRangeFromRequest(httpRequest, executionAttributes); - // for delete op, attach the number of files to delete - attachDeleteKeySizeAttribute(request); + // for delete op, attach the number of files to delete + attachDeleteKeySizeAttribute(sdkRequest); // build the referrer header final String header = referrer.buildHttpReferrer(); @@ -397,9 +401,9 @@ public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, } // now see if the request is actually a blocked multipart request - if (!isMultipartUploadEnabled && isRequestMultipartIO(httpRequest)) { + if (!isMultipartUploadEnabled && isRequestMultipartIO(sdkRequest)) { throw new AuditOperationRejectedException("Multipart IO request " - + httpRequest + " rejected " + header); + + sdkRequest + " rejected " + header); } return httpRequest; @@ -409,16 +413,16 @@ public SdkHttpRequest modifyHttpRequest(Context.ModifyHttpRequest context, * For delete requests, attach delete key size as a referrer attribute. * * @param request the request object. - * @param type of the request. */ - private void attachDeleteKeySizeAttribute(T request) { + private void attachDeleteKeySizeAttribute(SdkRequest request) { + if (request instanceof DeleteObjectsRequest) { - int keySize = ((DeleteObjectsRequest) request).getKeys().size(); - this.set(DELETE_KEYS_SIZE, String.valueOf(keySize)); + int keySize = ((DeleteObjectsRequest) request).delete().objects().size(); + referrer.set(DELETE_KEYS_SIZE, String.valueOf(keySize)); } else if (request instanceof DeleteObjectRequest) { - String key = ((DeleteObjectRequest) request).getKey(); + String key = ((DeleteObjectRequest) request).key(); if (key != null && key.length() > 0) { - this.set(DELETE_KEYS_SIZE, "1"); + referrer.set(DELETE_KEYS_SIZE, "1"); } } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index 0f6421d1bc4e2..5c33f19270ebb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -34,8 +34,11 @@ import software.amazon.awssdk.core.interceptor.InterceptorContext; import software.amazon.awssdk.http.SdkHttpMethod; import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + import org.junit.After; import org.junit.Before; import org.slf4j.Logger; @@ -288,15 +291,31 @@ protected void assertMapNotContains(final Map params, final Stri * @param keys keys to be provided in the bulk delete request. * @return a processed request. */ - protected DeleteObjectsRequest headForBulkDelete(String... keys) { + protected SdkHttpRequest headForBulkDelete(String... keys) { if (keys == null || keys.length == 0) { return null; } - List keysToDelete = Arrays + + List keysToDelete = Arrays .stream(keys) - .map(DeleteObjectsRequest.KeyVersion::new) + .map(key -> ObjectIdentifier.builder().key(key).build()) .collect(Collectors.toList()); - return manager.beforeExecution(requestFactory.newBulkDeleteRequest(keysToDelete)); + + ExecutionAttributes executionAttributes = ExecutionAttributes.builder().build(); + + SdkHttpRequest.Builder httpRequestBuilder = + SdkHttpRequest.builder().uri(URI.create("https://test")).method(SdkHttpMethod.POST); + + DeleteObjectsRequest deleteObjectsRequest = + requestFactory.newBulkDeleteRequestBuilder(keysToDelete).build(); + + InterceptorContext context = InterceptorContext.builder() + .request(deleteObjectsRequest) + .httpRequest(httpRequestBuilder.build()) + .build(); + + manager.beforeExecution(context, executionAttributes); + return manager.modifyHttpRequest(context, executionAttributes); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index 430e1370c0397..7f8dd043261b2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -350,16 +350,20 @@ public void testGetObjectWithoutRange() throws Throwable { public void testHttpReferrerForBulkDelete() throws Throwable { AuditSpan span = span(); long ts = span.getTimestamp(); - DeleteObjectsRequest request = headForBulkDelete( + SdkHttpRequest request = headForBulkDelete( "key_01", "key_02", "key_03"); - Map headers - = request.getCustomRequestHeaders(); + Map> headers + = request.headers(); assertThat(headers) .describedAs("Custom headers") .containsKey(HEADER_REFERRER); - String header = headers.get(HEADER_REFERRER); + List headerValues = headers.get(HEADER_REFERRER); + assertThat(headerValues) + .describedAs("Multiple referrer headers") + .hasSize(1); + String header = headerValues.get(0); LOG.info("Header is {}", header); Map params = HttpReferrerAuditHeader.extractQueryParameters(header); From 7978e7056500794402e94e42e36e64ff0e138c5c Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 17 May 2023 15:26:18 +0100 Subject: [PATCH 13/13] HADOOP-18742. AWS v2 SDK: stabilise dependencies with rest of hadoop libraries * explicit jackson declaration in hadoop-aws * cut jackson, eventstream and ion from aws SDK dependencies Change-Id: I47f6e10d42c8067df8255eca69799469d7252480 --- hadoop-project/pom.xml | 14 ++++++++++++++ hadoop-tools/hadoop-aws/pom.xml | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index cf7852bad7d70..a228d10f880b7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1133,6 +1133,12 @@ com.amazonaws aws-java-sdk-core ${aws-java-sdk.version} + + + software.amazon.ion + ion-java + +
    software.amazon.awssdk @@ -1143,6 +1149,14 @@ io.netty * + + software.amazon.eventstream + eventstream + + + com.fasterxml.jackson.dataformat + jackson-dataformat-cbor + diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 0731df4daf089..cd7ffb0b47afe 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -509,6 +509,10 @@ aws-crt compile + + com.fasterxml.jackson.dataformat + jackson-dataformat-cbor + org.assertj assertj-core