From 281f4c9d8e6867a5864c1f2902e277417295a0f8 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 17 Aug 2023 14:29:26 +0100 Subject: [PATCH 1/4] upgrades sdk v2 version --- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 809084cb3f7da..9eddea0f9eb99 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -184,7 +184,7 @@ 900 1.12.367 2.7.1 - 2.19.12 + 2.20.128 1.0.1 0.21.0 1.11.2 From f126201e8f17399840447f8427b97484c6e8c0f5 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Fri, 18 Aug 2023 10:56:10 +0100 Subject: [PATCH 2/4] configure multipart in java async client --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 11 ++++++++- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 1 + .../apache/hadoop/fs/s3a/S3ClientFactory.java | 24 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 1b2c129a6428a..98c72d276628e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -37,6 +37,7 @@ import software.amazon.awssdk.services.s3.S3BaseClientBuilder; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.commons.lang3.StringUtils; @@ -98,17 +99,25 @@ public S3AsyncClient createS3AsyncClient( Configuration conf = getConf(); String bucket = uri.getHost(); + NettyNioAsyncHttpClient.Builder httpClientBuilder = AWSClientConfig .createAsyncHttpClientBuilder(conf) .proxyConfiguration(AWSClientConfig.createAsyncProxyConfiguration(conf, bucket)); + + MultipartConfiguration multipartConfiguration = MultipartConfiguration.builder() + .minimumPartSizeInBytes(parameters.getMinimumPartSize()) + .thresholdInBytes(parameters.getMultiPartThreshold()) + .build(); + return configureClientBuilder(S3AsyncClient.builder(), parameters, conf, bucket) .httpClientBuilder(httpClientBuilder) + .multipartConfiguration(multipartConfiguration) + .multipartEnabled(true) .build(); } @Override public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) { - return S3TransferManager.builder() .s3Client(s3AsyncClient) .build(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 305695f2ef5ac..43f0b9c98cdd6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -982,6 +982,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withRequesterPays(conf.getBoolean(ALLOW_REQUESTER_PAYS, DEFAULT_ALLOW_REQUESTER_PAYS)) .withExecutionInterceptors(auditManager.createExecutionInterceptors()) .withMinimumPartSize(partSize) + .withMultipartThreshold(multiPartThreshold) .withTransferManagerExecutor(unboundedThreadPool) .withRegion(region); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index fa2c0769d26a3..d4504cd08d74c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -151,6 +151,11 @@ final class S3ClientCreationParameters { */ private long minimumPartSize; + /** + * Threshold for multipart operations. + */ + private long multiPartThreshold; + /** * Executor that the transfer manager will use to execute background tasks. */ @@ -337,6 +342,25 @@ public S3ClientCreationParameters withMinimumPartSize( return this; } + /** + * Get the threshold for multipart operations. + * @return multipart threshold + */ + public long getMultiPartThreshold() { + return multiPartThreshold; + } + + /** + * Set the threshold for multipart operations. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withMultipartThreshold( + final long value) { + multiPartThreshold = value; + return this; + } + /** * Get the executor that the transfer manager will use to execute background tasks. * @return part size From 54dcf48d4e2918ceb485f74dce644714669b204e Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Fri, 18 Aug 2023 11:15:37 +0100 Subject: [PATCH 3/4] removes crt dependency --- hadoop-project/pom.xml | 6 ------ hadoop-tools/hadoop-aws/pom.xml | 4 ---- 2 files changed, 10 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 9eddea0f9eb99..871da7a960868 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -186,7 +186,6 @@ 2.7.1 2.20.128 1.0.1 - 0.21.0 1.11.2 2.1 0.7 @@ -1156,11 +1155,6 @@ eventstream ${aws.evenstream.version} - - software.amazon.awssdk.crt - aws-crt - ${awscrt.version} - org.apache.mina mina-core diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 3591ab4ea5a50..66154e2840e34 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -518,10 +518,6 @@ bundle compile - - software.amazon.awssdk.crt - aws-crt - software.amazon.eventstream eventstream From 9706ea8c494b283d47400848af35c7e7994ff09b Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Tue, 22 Aug 2023 19:25:09 +0100 Subject: [PATCH 4/4] updates License-binary, fixes eventstream typo --- LICENSE-binary | 3 +-- hadoop-project/pom.xml | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 851d839cdbd8d..0c8199da1be15 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -363,8 +363,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.1 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final -software.amazon.awssdk:bundle:jar:2.19.12 -software.amazon.awssdk.crt:aws-crt:0.21.0 +software.amazon.awssdk:bundle:jar:2.20.128 -------------------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 871da7a960868..47fa59cc67161 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -185,7 +185,7 @@ 1.12.367 2.7.1 2.20.128 - 1.0.1 + 1.0.1 1.11.2 2.1 0.7 @@ -1153,7 +1153,7 @@ software.amazon.eventstream eventstream - ${aws.evenstream.version} + ${aws.eventstream.version} org.apache.mina