From a8a7fa4804ce363b7d523bde5a4a410ddc2032cd Mon Sep 17 00:00:00 2001 From: PradhanPrerak39 Date: Tue, 13 Feb 2024 18:47:14 -0800 Subject: [PATCH 01/14] [SPARK-38958]: Override S3 Client in Spark Write/Read calls --- .../org/apache/hadoop/fs/s3a/Constants.java | 17 +++++++ .../hadoop/fs/s3a/impl/AWSClientConfig.java | 48 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 6c0efa6e5c309..c0756f381630c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -852,6 +852,23 @@ private Constants() { "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; + /** + * List of custom headers to be set on the service client. + * Multiple parameters can be used to specify custom headers. + * fs.s3a.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.sts.custom.headers - headers to add on all the sts requests. + * Examples + * CustomHeader {@literal ->} 'Header1:Value1' + * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' + */ + public static final String CUSTOM_HEADERS_STS = + "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + + ".custom.headers"; + + public static final String CUSTOM_HEADERS_S3 = + "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + + ".custom.headers"; + @Deprecated public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 4ea43e7a66eef..47407d517a99d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -22,6 +22,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.time.Duration; +import java.util.Arrays; +import java.util.List; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; @@ -76,6 +78,8 @@ import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_STS; import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_S3; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_STS; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.enforceMinimumDuration; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.getDuration; import static org.apache.hadoop.util.Preconditions.checkArgument; @@ -120,6 +124,8 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf initUserAgent(conf, overrideConfigBuilder); + initRequestHeaders(conf, overrideConfigBuilder, awsServiceIdentifier); + String signer = conf.getTrimmed(SIGNING_ALGORITHM, ""); if (!signer.isEmpty()) { LOG.debug("Signer override = {}", signer); @@ -412,6 +418,48 @@ private static void initSigner(Configuration conf, } } + /** + * + * @param conf hadoop configuration + * @param clientConfig client configuration to update + * @param awsServiceIdentifier service name + */ + private static void initRequestHeaders(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) { + String configKey = null; + switch (awsServiceIdentifier) { + case AWS_SERVICE_IDENTIFIER_S3: + configKey = CUSTOM_HEADERS_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = CUSTOM_HEADERS_STS; + break; + default: + // Nothing to do. The original signer override is already setup + } + if (configKey != null) { + String[] customHeaders = conf.getTrimmedStrings(configKey); + if (customHeaders == null || customHeaders.length == 0) { + LOG.debug("No custom headers specified"); + return; + } + + for (String customHeader : customHeaders) { + String[] parts = customHeader.split("="); + if (parts.length != 2) { + String message = "Invalid format (Expected header1=value1:value2,header2=value1) for Header: [" + + customHeader + + "]"; + LOG.error(message); + throw new IllegalArgumentException(message); + } + + List values = Arrays.asList(parts[1].split(":")); + clientConfig.putHeader(parts[0], values); + } + } + } + /** * Configures request timeout in the client configuration. * This is independent of the timeouts set in the sync and async HTTP clients; From 595b2a18935291361c1c3f9d32dd41266d0887f5 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Mon, 17 Jun 2024 13:38:48 -0700 Subject: [PATCH 02/14] address comments --- .../org/apache/hadoop/fs/s3a/Constants.java | 8 +-- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 26 +++----- .../fs/s3a/impl/TestAwsClientConfig.java | 59 +++++++++++++++++++ 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c0756f381630c..4df15ce803890 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -855,18 +855,18 @@ private Constants() { /** * List of custom headers to be set on the service client. * Multiple parameters can be used to specify custom headers. - * fs.s3a.s3.custom.headers - headers to add on all the s3 requests. - * fs.s3a.sts.custom.headers - headers to add on all the sts requests. + * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. * Examples * CustomHeader {@literal ->} 'Header1:Value1' * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' */ public static final String CUSTOM_HEADERS_STS = - "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".custom.headers"; public static final String CUSTOM_HEADERS_S3 = - "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + ".custom.headers"; @Deprecated diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 47407d517a99d..8816b87ba1999 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -24,6 +24,7 @@ import java.time.Duration; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; @@ -438,25 +439,12 @@ private static void initRequestHeaders(Configuration conf, // Nothing to do. The original signer override is already setup } if (configKey != null) { - String[] customHeaders = conf.getTrimmedStrings(configKey); - if (customHeaders == null || customHeaders.length == 0) { - LOG.debug("No custom headers specified"); - return; - } - - for (String customHeader : customHeaders) { - String[] parts = customHeader.split("="); - if (parts.length != 2) { - String message = "Invalid format (Expected header1=value1:value2,header2=value1) for Header: [" - + customHeader - + "]"; - LOG.error(message); - throw new IllegalArgumentException(message); - } - - List values = Arrays.asList(parts[1].split(":")); - clientConfig.putHeader(parts[0], values); - } + Map awsClientCustomHeadersMap = + S3AUtils.getTrimmedStringCollectionSplitByEquals(conf, configKey); + awsClientCustomHeadersMap.forEach((header, valueString) -> { + List headerValues = Arrays.asList(valueString.split(":")); + clientConfig.putHeader(header, headerValues); + }); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index eacff90ea4c8a..c5925c49f1137 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -18,9 +18,11 @@ package org.apache.hadoop.fs.s3a.impl; +import java.io.IOException; import java.time.Duration; import java.util.Arrays; +import org.apache.hadoop.util.Lists; import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Test; @@ -30,10 +32,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.AbstractHadoopTestBase; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_ACQUISITION_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_IDLE_TIME; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_KEEPALIVE; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_TTL; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_S3; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_STS; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_ACQUISITION_TIMEOUT_DURATION; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_IDLE_TIME_DURATION; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_KEEPALIVE; @@ -48,6 +54,7 @@ import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createApiConnectionSettings; +import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createClientConfigBuilder; import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createConnectionSettings; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.enforceMinimumDuration; @@ -201,4 +208,56 @@ public void testCreateApiConnectionSettingsDefault() { private void setOptionsToValue(String value, Configuration conf, String... keys) { Arrays.stream(keys).forEach(key -> conf.set(key, value)); } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_STS} is set, + * verify that returned client configuration has desired headers set. + */ + @Test + public void testInitRequestHeadersForSTS() throws IOException { + final Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_STS, "foo=bar:baz,qux=quux"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_S3)) + .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) + .isNull(); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("foo")) + .describedAs("STS client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("qux")) + .describedAs("STS client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); + } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_S3} is set, + * verify that returned client configuration has desired headers set. + */ + @Test + public void testInitRequestHeadersForS3() throws IOException { + final Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_S3, "foo=bar:baz,qux=quux"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) + .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) + .isNull(); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("foo")) + .describedAs("S3 client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("qux")) + .describedAs("S3 client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); + } } From bb84464116a94eec4b170955b5654891bfe6ac60 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 12:20:19 -0700 Subject: [PATCH 03/14] address comments --- .../java/org/apache/hadoop/fs/s3a/Constants.java | 14 ++++++++++---- .../apache/hadoop/fs/s3a/impl/AWSClientConfig.java | 1 + .../hadoop/fs/s3a/impl/TestAwsClientConfig.java | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 4df15ce803890..11d44c655d5db 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -852,6 +852,12 @@ private Constants() { "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; + /** Prefix for S3A client-specific properties. */ + public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; + + /** Custom headers postfix */ + public static final String CUSTOM_HEADER_POSTFIX = ".custom.headers"; + /** * List of custom headers to be set on the service client. * Multiple parameters can be used to specify custom headers. @@ -862,12 +868,12 @@ private Constants() { * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' */ public static final String CUSTOM_HEADERS_STS = - "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + ".custom.headers"; + FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + + CUSTOM_HEADER_POSTFIX; public static final String CUSTOM_HEADERS_S3 = - "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + ".custom.headers"; + FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + + CUSTOM_HEADER_POSTFIX; @Deprecated public static final String S3N_FOLDER_SUFFIX = "_$folder$"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 8816b87ba1999..3d9dd363887fc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -445,6 +445,7 @@ private static void initRequestHeaders(Configuration conf, List headerValues = Arrays.asList(valueString.split(":")); clientConfig.putHeader(header, headerValues); }); + LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index c5925c49f1137..ed626b7806515 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -22,7 +22,6 @@ import java.time.Duration; import java.util.Arrays; -import org.apache.hadoop.util.Lists; import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Test; @@ -31,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.Lists; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; From 3e1252990744995aad949bbe68fad59307834431 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 15:44:38 -0700 Subject: [PATCH 04/14] renames variable --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 11d44c655d5db..a934a31ea475b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -856,7 +856,7 @@ private Constants() { public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; /** Custom headers postfix */ - public static final String CUSTOM_HEADER_POSTFIX = ".custom.headers"; + public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; /** * List of custom headers to be set on the service client. @@ -869,11 +869,11 @@ private Constants() { */ public static final String CUSTOM_HEADERS_STS = FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + CUSTOM_HEADER_POSTFIX; + + CUSTOM_HEADERS_POSTFIX; public static final String CUSTOM_HEADERS_S3 = FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + CUSTOM_HEADER_POSTFIX; + + CUSTOM_HEADERS_POSTFIX; @Deprecated public static final String S3N_FOLDER_SUFFIX = "_$folder$"; From fb2d15c339575a56fb5f958b38c9c677ca6def56 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 16:29:38 -0700 Subject: [PATCH 05/14] retrigger checks From cd9d28ac368abac5dd0bca7bdd888d120485d93b Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Wed, 3 Jul 2024 14:12:59 -0700 Subject: [PATCH 06/14] check style fixes --- .../org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 16 +++--- .../fs/s3a/impl/TestAwsClientConfig.java | 56 +++++++++++-------- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index a934a31ea475b..19743db585cc9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -855,7 +855,7 @@ private Constants() { /** Prefix for S3A client-specific properties. */ public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; - /** Custom headers postfix */ + /** Custom headers postfix. */ public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 3d9dd363887fc..2f2fbdc3183b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -429,14 +429,14 @@ private static void initRequestHeaders(Configuration conf, ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) { String configKey = null; switch (awsServiceIdentifier) { - case AWS_SERVICE_IDENTIFIER_S3: - configKey = CUSTOM_HEADERS_S3; - break; - case AWS_SERVICE_IDENTIFIER_STS: - configKey = CUSTOM_HEADERS_STS; - break; - default: - // Nothing to do. The original signer override is already setup + case AWS_SERVICE_IDENTIFIER_S3: + configKey = CUSTOM_HEADERS_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = CUSTOM_HEADERS_STS; + break; + default: + // Nothing to do. The original signer override is already setup } if (configKey != null) { Map awsClientCustomHeadersMap = diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index ed626b7806515..20c95be6926a2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -221,18 +221,22 @@ public void testInitRequestHeadersForSTS() throws IOException { .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) .isNull(); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) - .describedAs("Count of S3 client headers") - .isEqualTo(0); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) - .describedAs("Count of STS client headers") - .isEqualTo(2); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("foo")) - .describedAs("STS client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("qux")) - .describedAs("STS client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().get("foo")) + .describedAs("STS client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().get("qux")) + .describedAs("STS client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); } /** @@ -247,17 +251,21 @@ public void testInitRequestHeadersForS3() throws IOException { .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) .isNull(); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) - .describedAs("Count of STS client headers") - .isEqualTo(0); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) - .describedAs("Count of S3 client headers") - .isEqualTo(2); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("foo")) - .describedAs("S3 client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("qux")) - .describedAs("S3 client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("foo")) + .describedAs("S3 client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("qux")) + .describedAs("S3 client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); } } From aecf3575a37874e2baa7c4873dec366f620727ea Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Wed, 14 Aug 2024 15:52:10 -0700 Subject: [PATCH 07/14] change delimiter to semicolon (;) and adress comments --- .../org/apache/hadoop/fs/s3a/Constants.java | 28 +++++++++++++++++++ .../hadoop/fs/s3a/impl/AWSClientConfig.java | 6 ++-- .../fs/s3a/impl/TestAwsClientConfig.java | 4 +-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 19743db585cc9..d060020bebbbf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -25,6 +25,7 @@ import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import java.time.Duration; +import java.util.Locale; import java.util.concurrent.TimeUnit; import static org.apache.hadoop.io.Sizes.S_128K; @@ -1362,6 +1363,33 @@ private Constants() { public static final String AWS_SERVICE_IDENTIFIER_DDB = "DDB"; public static final String AWS_SERVICE_IDENTIFIER_STS = "STS"; + /** Prefix for S3A client-specific properties. + * value: {@value} + */ + public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; + + /** Custom headers postfix. + * value: {@value} + */ + public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; + + /** + * List of custom headers to be set on the service client. + * Multiple parameters can be used to specify custom headers. + * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. + * Examples + * CustomHeader {@literal ->} 'Header1:Value1' + * CustomHeaders {@literal ->} 'Header1=Value1;Value2,Header2=Value1' + */ + public static final String CUSTOM_HEADERS_STS = + FS_S3A_CLIENT_PREFIX + AWS_SERVICE_IDENTIFIER_STS.toLowerCase(Locale.ROOT) + + CUSTOM_HEADERS_POSTFIX; + + public static final String CUSTOM_HEADERS_S3 = + FS_S3A_CLIENT_PREFIX + AWS_SERVICE_IDENTIFIER_S3.toLowerCase(Locale.ROOT) + + CUSTOM_HEADERS_POSTFIX; + /** * How long to wait for the thread pool to terminate when cleaning up. * Value: {@value} seconds. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 2f2fbdc3183b2..1ea589be7f47f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -420,7 +420,7 @@ private static void initSigner(Configuration conf, } /** - * + * Initialize custom request headers for AWS clients. * @param conf hadoop configuration * @param clientConfig client configuration to update * @param awsServiceIdentifier service name @@ -436,13 +436,13 @@ private static void initRequestHeaders(Configuration conf, configKey = CUSTOM_HEADERS_STS; break; default: - // Nothing to do. The original signer override is already setup + // No known service. } if (configKey != null) { Map awsClientCustomHeadersMap = S3AUtils.getTrimmedStringCollectionSplitByEquals(conf, configKey); awsClientCustomHeadersMap.forEach((header, valueString) -> { - List headerValues = Arrays.asList(valueString.split(":")); + List headerValues = Arrays.asList(valueString.split(";")); clientConfig.putHeader(header, headerValues); }); LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index 20c95be6926a2..859544f0878d1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -216,7 +216,7 @@ private void setOptionsToValue(String value, Configuration conf, String... keys) @Test public void testInitRequestHeadersForSTS() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_STS, "foo=bar:baz,qux=quux"); + conf.set(CUSTOM_HEADERS_STS, "foo=bar;baz,qux=quux"); Assertions.assertThat(conf.get(CUSTOM_HEADERS_S3)) .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) .isNull(); @@ -246,7 +246,7 @@ public void testInitRequestHeadersForSTS() throws IOException { @Test public void testInitRequestHeadersForS3() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_S3, "foo=bar:baz,qux=quux"); + conf.set(CUSTOM_HEADERS_S3, "foo=bar;baz,qux=quux"); Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) .isNull(); From 52d09ae7870a22916dbb3a29b11923caa5f83200 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Tue, 11 Feb 2025 10:14:33 -0800 Subject: [PATCH 08/14] Address review comments: Update Javadoc and improve documentation for custom headers. --- .../org/apache/hadoop/fs/s3a/Constants.java | 14 ++++++---- .../site/markdown/tools/hadoop-aws/index.md | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index d060020bebbbf..20ef6c6a457c5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1376,11 +1376,15 @@ private Constants() { /** * List of custom headers to be set on the service client. * Multiple parameters can be used to specify custom headers. - * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. - * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. - * Examples - * CustomHeader {@literal ->} 'Header1:Value1' - * CustomHeaders {@literal ->} 'Header1=Value1;Value2,Header2=Value1' + *
+   * Usage:
+   * fs.s3a.client.s3.custom.headers - Headers to add on all the S3 requests.
+   * fs.s3a.client.sts.custom.headers - Headers to add on all the STS requests.
+   *
+   * Examples:
+   * CustomHeader -> 'Header1:Value1'
+   * CustomHeaders -> 'Header1=Value1;Value2,Header2=Value1'
+   * 
*/ public static final String CUSTOM_HEADERS_STS = FS_S3A_CLIENT_PREFIX + AWS_SERVICE_IDENTIFIER_STS.toLowerCase(Locale.ROOT) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 79bcf55f92953..b7f70021364e2 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -947,6 +947,32 @@ The switch to turn S3A auditing on or off. ``` + +### Configuring Custom Headers for AWS Service Clients + +The S3A client allows users to set custom headers for specific AWS services, such as S3 and STS. +This feature allows you to set specific headers for S3 and STS service clients independently. + +**Configuration Properties:** +- `fs.s3a.client.s3.custom.headers`: Custom headers for S3 service requests. +- `fs.s3a.client.sts.custom.headers`: Sets custom headers for all requests to AWS STS. + +**Header Format:** +Custom headers should be specified as key-value pairs, separated by `=`. Multiple values for a single header can be separated by `;`. Multiple headers can be separated by `,`. + + +```xml + + fs.s3a.client.s3.custom.headers + Header1=Value1 + + + +fs.s3a.client.sts.custom.headers +Header1=Value1;Value2,Header2=Value1 + +``` + ## Retry and Recovery The S3A client makes a best-effort attempt at recovering from network failures; From be1ea51acd3b7157af9c06a0cfc9f3b1bc029b58 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Thu, 27 Feb 2025 10:23:27 -0800 Subject: [PATCH 09/14] Fix Javadoc: Escape -> with {@literal ->} to prevent CI job errors --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 20ef6c6a457c5..e7b684edb8c99 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1382,8 +1382,8 @@ private Constants() { * fs.s3a.client.sts.custom.headers - Headers to add on all the STS requests. * * Examples: - * CustomHeader -> 'Header1:Value1' - * CustomHeaders -> 'Header1=Value1;Value2,Header2=Value1' + * CustomHeader {@literal ->} 'Header1:Value1' + * CustomHeaders {@literal ->} 'Header1=Value1;Value2,Header2=Value1' * */ public static final String CUSTOM_HEADERS_STS = From e6815cec9bab4a0f10d24e65deebf358cc8c073e Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Thu, 1 May 2025 15:31:10 -0700 Subject: [PATCH 10/14] Clean up double addition of constants --- .../org/apache/hadoop/fs/s3a/Constants.java | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index e7b684edb8c99..c22438bd8a1ce 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -852,30 +852,7 @@ private Constants() { public static final String SIGNING_ALGORITHM_STS = "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; - - /** Prefix for S3A client-specific properties. */ - public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; - - /** Custom headers postfix. */ - public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; - - /** - * List of custom headers to be set on the service client. - * Multiple parameters can be used to specify custom headers. - * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. - * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. - * Examples - * CustomHeader {@literal ->} 'Header1:Value1' - * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' - */ - public static final String CUSTOM_HEADERS_STS = - FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + CUSTOM_HEADERS_POSTFIX; - - public static final String CUSTOM_HEADERS_S3 = - FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + CUSTOM_HEADERS_POSTFIX; - + @Deprecated public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; From 05e373d267d5bcc2eb9716f5df3d16657ea8ce09 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Thu, 1 May 2025 16:56:24 -0700 Subject: [PATCH 11/14] Trim trailing whitespaces --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c22438bd8a1ce..361806545403b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -852,7 +852,7 @@ private Constants() { public static final String SIGNING_ALGORITHM_STS = "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; - + @Deprecated public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; From 9406435be3a98144799c7f3114093d6101304892 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Fri, 9 May 2025 11:28:15 -0700 Subject: [PATCH 12/14] Update documentation for configuring custom S3 and STS headers --- .../hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index b7f70021364e2..1f65caeb5e219 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -950,8 +950,7 @@ The switch to turn S3A auditing on or off. ### Configuring Custom Headers for AWS Service Clients -The S3A client allows users to set custom headers for specific AWS services, such as S3 and STS. -This feature allows you to set specific headers for S3 and STS service clients independently. +You can set custom headers for S3 and STS requests. These headers are set on client level, and will be sent for all requests made to these services. **Configuration Properties:** - `fs.s3a.client.s3.custom.headers`: Custom headers for S3 service requests. From 95a42953f88c3ad87eaba8ec6afe5640c55352cf Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Tue, 20 May 2025 10:27:18 -0700 Subject: [PATCH 13/14] Trim header values and update unit tests --- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 12 ++- .../fs/s3a/impl/TestAwsClientConfig.java | 85 ++++++++++++++++--- 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 1ea589be7f47f..8bdb7fd8ea36b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -442,8 +443,15 @@ private static void initRequestHeaders(Configuration conf, Map awsClientCustomHeadersMap = S3AUtils.getTrimmedStringCollectionSplitByEquals(conf, configKey); awsClientCustomHeadersMap.forEach((header, valueString) -> { - List headerValues = Arrays.asList(valueString.split(";")); - clientConfig.putHeader(header, headerValues); + List headerValues = Arrays.stream(valueString.split(";")) + .map(String::trim) + .filter(v -> !v.isEmpty()) + .collect(Collectors.toList()); + if (!headerValues.isEmpty()) { + clientConfig.putHeader(header, headerValues); + } else { + LOG.warn("Ignoring header '{}' for {} client because no values were provided", header, awsServiceIdentifier); + } }); LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index 859544f0878d1..a205fe52049b8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -216,7 +216,8 @@ private void setOptionsToValue(String value, Configuration conf, String... keys) @Test public void testInitRequestHeadersForSTS() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_STS, "foo=bar;baz,qux=quux"); + conf.set(CUSTOM_HEADERS_STS, "header1=value1;value2,header2=value3"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_S3)) .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) .isNull(); @@ -225,18 +226,21 @@ public void testInitRequestHeadersForSTS() throws IOException { .headers().size()) .describedAs("Count of S3 client headers") .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) .headers().size()) .describedAs("Count of STS client headers") .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) - .headers().get("foo")) - .describedAs("STS client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); + .headers().get("header1")) + .describedAs("STS client 'header1' header value") + .isEqualTo(Lists.newArrayList("value1", "value2")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) - .headers().get("qux")) - .describedAs("STS client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + .headers().get("header2")) + .describedAs("STS client 'header2' header value") + .isEqualTo(Lists.newArrayList("value3")); } /** @@ -246,7 +250,8 @@ public void testInitRequestHeadersForSTS() throws IOException { @Test public void testInitRequestHeadersForS3() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_S3, "foo=bar;baz,qux=quux"); + conf.set(CUSTOM_HEADERS_S3, "header1=value1;value2,header2=value3"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) .isNull(); @@ -255,17 +260,69 @@ public void testInitRequestHeadersForS3() throws IOException { .headers().size()) .describedAs("Count of STS client headers") .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) .headers().size()) .describedAs("Count of S3 client headers") .isEqualTo(2); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("header1")) + .describedAs("S3 client 'header1' header value") + .isEqualTo(Lists.newArrayList("value1", "value2")); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("header2")) + .describedAs("S3 client 'header2' header value") + .isEqualTo(Lists.newArrayList("value3")); + } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_S3} is set, + * verify that returned client configuration has desired headers set with whitespaces trimmed for headers and values. + */ + @Test + public void testInitRequestHeadersForS3WithWhitespace() throws IOException { + final Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_S3, " header1 = value1 ; value2 , header2= value3 "); + + Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) + .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) + .isNull(); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(0); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) - .headers().get("foo")) - .describedAs("S3 client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); + .headers().get("header1")) + .describedAs("S3 client 'header1' header value") + .isEqualTo(Lists.newArrayList("value1", "value2")); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("header2")) + .describedAs("S3 client 'header2' header value") + .isEqualTo(Lists.newArrayList("value3")); + } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_S3} is set with duplicate values, + * verify that returned client configuration has desired headers with both values. + */ + @Test + public void testInitRequestHeadersForS3WithDuplicateValues() throws IOException { + Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_S3, "header1=duplicate;duplicate"); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) - .headers().get("qux")) - .describedAs("S3 client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + .headers().get("header1")) + .describedAs("S3 client 'header1' header value") + .isEqualTo(Lists.newArrayList("duplicate", "duplicate")); } } From 74bfff3ff7394a00241452d142b63043e5133ebc Mon Sep 17 00:00:00 2001 From: Aditya Deshpande Date: Wed, 4 Jun 2025 08:26:50 -0700 Subject: [PATCH 14/14] Fix checkstyle errors --- .../java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java | 3 ++- .../org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 8bdb7fd8ea36b..274da46e7074a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -450,7 +450,8 @@ private static void initRequestHeaders(Configuration conf, if (!headerValues.isEmpty()) { clientConfig.putHeader(header, headerValues); } else { - LOG.warn("Ignoring header '{}' for {} client because no values were provided", header, awsServiceIdentifier); + LOG.warn("Ignoring header '{}' for {} client because no values were provided", + header, awsServiceIdentifier); } }); LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index a205fe52049b8..a199e5bc33541 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -279,7 +279,8 @@ public void testInitRequestHeadersForS3() throws IOException { /** * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_S3} is set, - * verify that returned client configuration has desired headers set with whitespaces trimmed for headers and values. + * verify that returned client configuration has desired headers set with + * whitespaces trimmed for headers and values. */ @Test public void testInitRequestHeadersForS3WithWhitespace() throws IOException {