From 872dcaccf211100c7c63b7a9badfb84ee14b2f87 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sun, 21 Jan 2024 22:54:52 -0800 Subject: [PATCH 01/19] HADOOP-19044. AWS SDK V2 - Update S3A region logic --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 4 ++ .../fs/s3a/ITestS3ACrossRegionAccess.java | 64 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 0a3267a9fe51d..2cfa8b19bc8e4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -320,6 +320,10 @@ private , ClientT> void origin = "SDK region chain"; } + if (parameters.getEndpoint().endsWith(CENTRAL_ENDPOINT)) { + builder.crossRegionAccessEnabled(true); + } + LOG.debug("Setting region to {} from {}", region, origin); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java new file mode 100644 index 0000000000000..17b67455c722b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Test to verify cross region bucket access. + */ +public class ITestS3ACrossRegionAccess extends AbstractS3ATestBase { + + @Test + public void testCentralEndpointCrossRegionAccess() throws Throwable { + describe("Create bucket on different region and access it using central endpoint"); + Configuration conf = getConfiguration(); + removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); + + Configuration newConf = new Configuration(conf); + + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); + + try (S3AFileSystem newFs = new S3AFileSystem()) { + newFs.initialize(getFileSystem().getUri(), newConf); + + final String file = getMethodName(); + Path basePath = new Path("basePath" + getMethodName()); + final Path srcDir = new Path(basePath, "srcdir"); + newFs.mkdirs(srcDir); + Path src = new Path(srcDir, file); + + try (FSDataOutputStream out = newFs.create(src)) { + out.write(new byte[] {1, 2, 3, 4, 5}); + } + ContractTestUtils.assertIsFile(getFileSystem(), new Path(srcDir, file)); + } + } + +} From d8c9793cb7fff70eebbd0bffc1d53a39baad48de Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sun, 21 Jan 2024 23:09:57 -0800 Subject: [PATCH 02/19] addendum - minor change to log and test --- .../java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 1 + .../org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 2cfa8b19bc8e4..c21457c813978 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -322,6 +322,7 @@ private , ClientT> void if (parameters.getEndpoint().endsWith(CENTRAL_ENDPOINT)) { builder.crossRegionAccessEnabled(true); + LOG.debug("Enabling cross region access for endpoint {}", parameters.getEndpoint()); } LOG.debug("Setting region to {} from {}", region, origin); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java index 17b67455c722b..39cfb26a05320 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java @@ -49,7 +49,7 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { newFs.initialize(getFileSystem().getUri(), newConf); final String file = getMethodName(); - Path basePath = new Path("basePath" + getMethodName()); + Path basePath = new Path("basePath-" + getMethodName()); final Path srcDir = new Path(basePath, "srcdir"); newFs.mkdirs(srcDir); Path src = new Path(srcDir, file); From cefb30b59f593f36ae04344c90a2292faea0635f Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sun, 21 Jan 2024 23:37:21 -0800 Subject: [PATCH 03/19] addendum --- .../java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index c21457c813978..d1dd75908adaf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -320,7 +320,7 @@ private , ClientT> void origin = "SDK region chain"; } - if (parameters.getEndpoint().endsWith(CENTRAL_ENDPOINT)) { + if (parameters.getEndpoint() != null && parameters.getEndpoint().endsWith(CENTRAL_ENDPOINT)) { builder.crossRegionAccessEnabled(true); LOG.debug("Enabling cross region access for endpoint {}", parameters.getEndpoint()); } From 81d345bfdc9c7e6ac62359de1378ceb1be14079e Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 00:16:38 -0800 Subject: [PATCH 04/19] addendum - use var for endpoint --- .../org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index d1dd75908adaf..5a9c89b78733b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -267,7 +267,8 @@ protected ClientOverrideConfiguration.Builder createClientOverrideConfiguration( */ private , ClientT> void configureEndpointAndRegion( BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { - URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); + final String endpointStr = parameters.getEndpoint(); + URI endpoint = getS3Endpoint(endpointStr, conf); String configuredRegion = parameters.getRegion(); Region region = null; @@ -294,7 +295,7 @@ private , ClientT> void builder.endpointOverride(endpoint); // No region was configured, try to determine it from the endpoint. if (region == null) { - region = getS3RegionFromEndpoint(parameters.getEndpoint()); + region = getS3RegionFromEndpoint(endpointStr); if (region != null) { origin = "endpoint"; } @@ -320,9 +321,9 @@ private , ClientT> void origin = "SDK region chain"; } - if (parameters.getEndpoint() != null && parameters.getEndpoint().endsWith(CENTRAL_ENDPOINT)) { + if (endpointStr != null && endpointStr.endsWith(CENTRAL_ENDPOINT)) { builder.crossRegionAccessEnabled(true); - LOG.debug("Enabling cross region access for endpoint {}", parameters.getEndpoint()); + LOG.debug("Enabling cross region access for endpoint {}", endpointStr); } LOG.debug("Setting region to {} from {}", region, origin); From 05225bb74669d5261a9cc376c1dd974c9f81643d Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 09:18:28 -0800 Subject: [PATCH 05/19] addendum - addressing comments for source changes --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 15 +++++++-------- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 5a9c89b78733b..95d55d571ab49 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -298,6 +298,10 @@ private , ClientT> void region = getS3RegionFromEndpoint(endpointStr); if (region != null) { origin = "endpoint"; + if (endpointStr.endsWith(CENTRAL_ENDPOINT)) { + builder.crossRegionAccessEnabled(true); + LOG.debug("Enabling cross region access for endpoint {}", endpointStr); + } } } LOG.debug("Setting endpoint to {}", endpoint); @@ -321,11 +325,6 @@ private , ClientT> void origin = "SDK region chain"; } - if (endpointStr != null && endpointStr.endsWith(CENTRAL_ENDPOINT)) { - builder.crossRegionAccessEnabled(true); - LOG.debug("Enabling cross region access for endpoint {}", endpointStr); - } - LOG.debug("Setting region to {} from {}", region, origin); } @@ -360,7 +359,7 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { /** * Parses the endpoint to get the region. - * If endpoint is the central one, use US_EAST_1. + * If endpoint is the central one, use US_EAST_2. * * @param endpoint the configure endpoint. * @return the S3 region, null if unable to resolve from endpoint. @@ -372,8 +371,8 @@ private static Region getS3RegionFromEndpoint(String endpoint) { return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } - // endpoint is for US_EAST_1; - return Region.US_EAST_1; + // endpoint is for US_EAST_2; + return Region.US_EAST_2; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 5e6991128b201..b366812935084 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -146,7 +146,7 @@ public void testCentralEndpoint() throws Throwable { describe("Create a client with the central endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, false); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_2, false); expectInterceptorException(client); } From 2c653da1b226438f658af9aa69f33781f78344ec Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 12:20:43 -0800 Subject: [PATCH 06/19] addendum - minor src change + test to delete dir --- .../apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 10 ++++++---- .../hadoop/fs/s3a/ITestS3ACrossRegionAccess.java | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 95d55d571ab49..e16092e5e8037 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -295,10 +295,11 @@ private , ClientT> void builder.endpointOverride(endpoint); // No region was configured, try to determine it from the endpoint. if (region == null) { - region = getS3RegionFromEndpoint(endpointStr); + boolean endpointEndsWithCentral = endpointStr.endsWith(CENTRAL_ENDPOINT); + region = getS3RegionFromEndpoint(endpointStr, endpointEndsWithCentral); if (region != null) { origin = "endpoint"; - if (endpointStr.endsWith(CENTRAL_ENDPOINT)) { + if (endpointEndsWithCentral) { builder.crossRegionAccessEnabled(true); LOG.debug("Enabling cross region access for endpoint {}", endpointStr); } @@ -362,11 +363,12 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * If endpoint is the central one, use US_EAST_2. * * @param endpoint the configure endpoint. + * @param endpointEndsWithCentral true if the endpoint is configured as central. * @return the S3 region, null if unable to resolve from endpoint. */ - private static Region getS3RegionFromEndpoint(String endpoint) { + private static Region getS3RegionFromEndpoint(String endpoint, boolean endpointEndsWithCentral) { - if(!endpoint.endsWith(CENTRAL_ENDPOINT)) { + if (!endpointEndsWithCentral) { LOG.debug("Endpoint {} is not the default; parsing", endpoint); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java index 39cfb26a05320..dedb9435d68bf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java @@ -58,6 +58,7 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { out.write(new byte[] {1, 2, 3, 4, 5}); } ContractTestUtils.assertIsFile(getFileSystem(), new Path(srcDir, file)); + newFs.delete(srcDir, true); } } From 99cf2d4e1068c4a2164fa911cb14df5ac61e32d6 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 12:43:55 -0800 Subject: [PATCH 07/19] addendum - test with endpoint & region --- .../apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index b366812935084..ad0c0a90ca8f9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -151,6 +151,16 @@ public void testCentralEndpoint() throws Throwable { expectInterceptorException(client); } + @Test + public void testCentralEndpointWithRegion() throws Throwable { + describe("Create a client with the central endpoint but also specify region"); + Configuration conf = getConfiguration(); + + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_WEST_2, false); + + expectInterceptorException(client); + } + @Test public void testWithRegionConfig() throws Throwable { describe("Create a client with a configured region"); From 12ff2f156b4fb0887e56d6b44a32dbff0d739c26 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 13:33:56 -0800 Subject: [PATCH 08/19] addendum --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 4408cf68a451e..5d380a00526c3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1327,7 +1327,7 @@ private Constants() { * The special S3 region which can be used to talk to any bucket. * Value {@value}. */ - public static final String AWS_S3_CENTRAL_REGION = "us-east-1"; + public static final String AWS_S3_CENTRAL_REGION = "us-east-2"; /** * The default S3 region when using cross region client. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index e16092e5e8037..953bd29fd4470 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -53,6 +53,7 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -373,8 +374,8 @@ private static Region getS3RegionFromEndpoint(String endpoint, boolean endpointE return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } - // endpoint is for US_EAST_2; - return Region.US_EAST_2; + // endpoint for central region + return Region.of(AWS_S3_CENTRAL_REGION); } } From fcf56cf8393d0562b84077b914639e094ec5fdbf Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 22 Jan 2024 18:11:21 -0800 Subject: [PATCH 09/19] addendum --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 5d380a00526c3..4d2ccdf88bcec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1324,16 +1324,16 @@ private Constants() { public static final String AWS_REGION = "fs.s3a.endpoint.region"; /** - * The special S3 region which can be used to talk to any bucket. + * The default S3 region when using cross region client. * Value {@value}. */ - public static final String AWS_S3_CENTRAL_REGION = "us-east-2"; + public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; /** - * The default S3 region when using cross region client. + * The special S3 region which can be used to talk to any bucket. * Value {@value}. */ - public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; + public static final String AWS_S3_CENTRAL_REGION = AWS_S3_DEFAULT_REGION; /** * Is the endpoint a FIPS endpoint? From 61977231dce6d19dbb79371496fba1999f940f25 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 23 Jan 2024 11:14:23 -0800 Subject: [PATCH 10/19] addendum - addressing latest comments --- .../org/apache/hadoop/fs/s3a/Constants.java | 8 +-- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 16 +++-- .../fs/s3a/ITestS3ACrossRegionAccess.java | 65 ------------------- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 35 ++++++++++ 4 files changed, 51 insertions(+), 73 deletions(-) delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 4d2ccdf88bcec..4408cf68a451e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1324,16 +1324,16 @@ private Constants() { public static final String AWS_REGION = "fs.s3a.endpoint.region"; /** - * The default S3 region when using cross region client. + * The special S3 region which can be used to talk to any bucket. * Value {@value}. */ - public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; + public static final String AWS_S3_CENTRAL_REGION = "us-east-1"; /** - * The special S3 region which can be used to talk to any bucket. + * The default S3 region when using cross region client. * Value {@value}. */ - public static final String AWS_S3_CENTRAL_REGION = AWS_S3_DEFAULT_REGION; + public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; /** * Is the endpoint a FIPS endpoint? diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 953bd29fd4470..ec9e50b743a03 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -53,7 +53,6 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -367,15 +366,24 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * @param endpointEndsWithCentral true if the endpoint is configured as central. * @return the S3 region, null if unable to resolve from endpoint. */ - private static Region getS3RegionFromEndpoint(String endpoint, boolean endpointEndsWithCentral) { + private static Region getS3RegionFromEndpoint(final String endpoint, + final boolean endpointEndsWithCentral) { if (!endpointEndsWithCentral) { LOG.debug("Endpoint {} is not the default; parsing", endpoint); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } - // endpoint for central region - return Region.of(AWS_S3_CENTRAL_REGION); + // Select default region here to enable cross-region access. + // If both "fs.s3a.endpoint" and "fs.s3a.endpoint.region" are empty, + // Spark sets "fs.s3a.endpoint" to "s3.amazonaws.com". + // ref: + // https://github.com/apache/spark/blob/v3.5.0/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L528 + // If we do not allow cross region access, Spark would not be able to + // access any bucket that is not present in the given region. + // Hence, we should use default region us-east-2 to allow cross-region + // access. + return Region.of(AWS_S3_DEFAULT_REGION); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java deleted file mode 100644 index dedb9435d68bf..0000000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACrossRegionAccess.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import org.junit.Test; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; -import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; - -/** - * Test to verify cross region bucket access. - */ -public class ITestS3ACrossRegionAccess extends AbstractS3ATestBase { - - @Test - public void testCentralEndpointCrossRegionAccess() throws Throwable { - describe("Create bucket on different region and access it using central endpoint"); - Configuration conf = getConfiguration(); - removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); - - Configuration newConf = new Configuration(conf); - - newConf.set(ENDPOINT, CENTRAL_ENDPOINT); - - try (S3AFileSystem newFs = new S3AFileSystem()) { - newFs.initialize(getFileSystem().getUri(), newConf); - - final String file = getMethodName(); - Path basePath = new Path("basePath-" + getMethodName()); - final Path srcDir = new Path(basePath, "srcdir"); - newFs.mkdirs(srcDir); - Path src = new Path(srcDir, file); - - try (FSDataOutputStream out = newFs.create(src)) { - out.write(new byte[] {1, 2, 3, 4, 5}); - } - ContractTestUtils.assertIsFile(getFileSystem(), new Path(srcDir, file)); - newFs.delete(srcDir, true); - } - } - -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index ad0c0a90ca8f9..78ca8d3ecd9a2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -38,10 +38,14 @@ import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; @@ -159,6 +163,10 @@ public void testCentralEndpointWithRegion() throws Throwable { S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_WEST_2, false); expectInterceptorException(client); + + client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_1, false); + + expectInterceptorException(client); } @Test @@ -267,6 +275,33 @@ public void testWithVPCE() throws Throwable { expectInterceptorException(client); } + @Test + public void testCentralEndpointCrossRegionAccess() throws Throwable { + describe("Create bucket on different region and access it using central endpoint"); + Configuration conf = getConfiguration(); + removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); + + Configuration newConf = new Configuration(conf); + + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); + + newFS = new S3AFileSystem(); + newFS.initialize(getFileSystem().getUri(), newConf); + + final String file = getMethodName(); + Path basePath = new Path("basePath-" + getMethodName()); + final Path srcDir = new Path(basePath, "srcdir"); + newFS.mkdirs(srcDir); + Path src = new Path(srcDir, file); + + try (FSDataOutputStream out = newFS.create(src)) { + out.write(new byte[] {1, 2, 3}); + } + + ContractTestUtils.assertIsFile(getFileSystem(), new Path(srcDir, file)); + newFS.delete(srcDir, true); + } + private final class RegionInterceptor implements ExecutionInterceptor { private final String endpoint; private final String region; From adb10d30530f180813182bbc262bda9be89f4db5 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 25 Jan 2024 19:23:42 -0800 Subject: [PATCH 11/19] addendum - addressing reviews from aws + hadoop PR --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 19 ++++++++++++++----- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index ec9e50b743a03..45af9c13c5e3d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -290,22 +290,30 @@ private , ClientT> void builder.fipsEnabled(fipsEnabled); if (endpoint != null) { + boolean overrideEndpoint = true; checkArgument(!fipsEnabled, "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint); - builder.endpointOverride(endpoint); // No region was configured, try to determine it from the endpoint. if (region == null) { - boolean endpointEndsWithCentral = endpointStr.endsWith(CENTRAL_ENDPOINT); - region = getS3RegionFromEndpoint(endpointStr, endpointEndsWithCentral); + boolean endpointEndsWithCentral = + endpointStr.endsWith(CENTRAL_ENDPOINT); + region = getS3RegionFromEndpoint(endpointStr, + endpointEndsWithCentral); if (region != null) { origin = "endpoint"; if (endpointEndsWithCentral) { + overrideEndpoint = false; builder.crossRegionAccessEnabled(true); - LOG.debug("Enabling cross region access for endpoint {}", endpointStr); + origin = "origin with cross region access"; + LOG.debug("Enabling cross region access for endpoint {}", + endpointStr); } } } - LOG.debug("Setting endpoint to {}", endpoint); + if (overrideEndpoint) { + builder.endpointOverride(endpoint); + LOG.debug("Setting endpoint to {}", endpoint); + } } if (region != null) { @@ -377,6 +385,7 @@ private static Region getS3RegionFromEndpoint(final String endpoint, // Select default region here to enable cross-region access. // If both "fs.s3a.endpoint" and "fs.s3a.endpoint.region" are empty, // Spark sets "fs.s3a.endpoint" to "s3.amazonaws.com". + // This applies to Spark versions with the changes of SPARK-35878. // ref: // https://github.com/apache/spark/blob/v3.5.0/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L528 // If we do not allow cross region access, Spark would not be able to diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 78ca8d3ecd9a2..f42b1cea2c0f1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -289,7 +289,7 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { newFS.initialize(getFileSystem().getUri(), newConf); final String file = getMethodName(); - Path basePath = new Path("basePath-" + getMethodName()); + Path basePath = methodPath(); final Path srcDir = new Path(basePath, "srcdir"); newFS.mkdirs(srcDir); Path src = new Path(srcDir, file); @@ -317,7 +317,7 @@ private final class RegionInterceptor implements ExecutionInterceptor { public void beforeExecution(Context.BeforeExecution context, ExecutionAttributes executionAttributes) { - if (endpoint != null) { + if (endpoint != null && (!endpoint.endsWith(CENTRAL_ENDPOINT) || !US_EAST_2.equals(region))) { Assertions.assertThat( executionAttributes.getAttribute(AwsExecutionAttribute.ENDPOINT_OVERRIDDEN)) .describedAs("Endpoint not overridden").isTrue(); From 54e5c66187c445b91f253672caa3d39a18a25173 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 25 Jan 2024 22:03:18 -0800 Subject: [PATCH 12/19] addendum - checkstyle --- .../java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 45af9c13c5e3d..c0ff97a167160 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -387,7 +387,8 @@ private static Region getS3RegionFromEndpoint(final String endpoint, // Spark sets "fs.s3a.endpoint" to "s3.amazonaws.com". // This applies to Spark versions with the changes of SPARK-35878. // ref: - // https://github.com/apache/spark/blob/v3.5.0/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L528 + // https://github.com/apache/spark/blob/v3.5.0/core/ + // src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L528 // If we do not allow cross region access, Spark would not be able to // access any bucket that is not present in the given region. // Hence, we should use default region us-east-2 to allow cross-region From e65a51acf88e92141e13e84a9665d4578ae27620 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 26 Jan 2024 14:50:57 -0800 Subject: [PATCH 13/19] addendum - minor changes: comment + test --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 9 ++++++-- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 21 +++++++++++++------ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index c0ff97a167160..a11be66466b98 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -268,9 +268,9 @@ protected ClientOverrideConfiguration.Builder createClientOverrideConfiguration( private , ClientT> void configureEndpointAndRegion( BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { final String endpointStr = parameters.getEndpoint(); - URI endpoint = getS3Endpoint(endpointStr, conf); + final URI endpoint = getS3Endpoint(endpointStr, conf); - String configuredRegion = parameters.getRegion(); + final String configuredRegion = parameters.getRegion(); Region region = null; String origin = ""; @@ -302,6 +302,11 @@ private , ClientT> void if (region != null) { origin = "endpoint"; if (endpointEndsWithCentral) { + // No need to override endpoint with "s3.amazonaws.com". + // Let the client take care of endpoint resolution. Overriding + // the endpoint with "s3.amazonaws.com" causes 400 Bad Request + // errors for non-existent buckets and objects. + // ref: https://github.com/aws/aws-sdk-java-v2/issues/4846 overrideEndpoint = false; builder.crossRegionAccessEnabled(true); origin = "origin with cross region access"; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index f42b1cea2c0f1..51f19cec48e2c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -278,10 +278,10 @@ public void testWithVPCE() throws Throwable { @Test public void testCentralEndpointCrossRegionAccess() throws Throwable { describe("Create bucket on different region and access it using central endpoint"); - Configuration conf = getConfiguration(); + final Configuration conf = getConfiguration(); removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); - Configuration newConf = new Configuration(conf); + final Configuration newConf = new Configuration(conf); newConf.set(ENDPOINT, CENTRAL_ENDPOINT); @@ -289,17 +289,26 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { newFS.initialize(getFileSystem().getUri(), newConf); final String file = getMethodName(); - Path basePath = methodPath(); + final Path basePath = methodPath(); final Path srcDir = new Path(basePath, "srcdir"); newFS.mkdirs(srcDir); - Path src = new Path(srcDir, file); + Path srcFilePath = new Path(srcDir, file); - try (FSDataOutputStream out = newFS.create(src)) { + try (FSDataOutputStream out = newFS.create(srcFilePath)) { out.write(new byte[] {1, 2, 3}); } - ContractTestUtils.assertIsFile(getFileSystem(), new Path(srcDir, file)); + ContractTestUtils.assertIsFile(getFileSystem(), srcFilePath); newFS.delete(srcDir, true); + + Assertions + .assertThat(newFS.exists(srcFilePath)) + .describedAs("Existence of file: " + srcFilePath) + .isFalse(); + Assertions + .assertThat(getFileSystem().exists(srcFilePath)) + .describedAs("Existence of file: " + srcFilePath) + .isFalse(); } private final class RegionInterceptor implements ExecutionInterceptor { From 2b89855a6b6b92a05cf49994ff17ca5f8397a16a Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 26 Jan 2024 14:55:50 -0800 Subject: [PATCH 14/19] addendum - minor test change --- .../java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 51f19cec48e2c..a0a01a3cdca36 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -299,6 +299,8 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { } ContractTestUtils.assertIsFile(getFileSystem(), srcFilePath); + ContractTestUtils.assertIsFile(newFS, srcFilePath); + newFS.delete(srcDir, true); Assertions From 5fdb23d70f32398d16718dc2f62db6cefae2a7eb Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 29 Jan 2024 17:27:29 -0800 Subject: [PATCH 15/19] addendum - ignore region configured for central endpoint + tests --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 9 ++++--- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 27 ++++++++++++++++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index a11be66466b98..0ee4f3513c63e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -293,10 +293,11 @@ private , ClientT> void boolean overrideEndpoint = true; checkArgument(!fipsEnabled, "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint); - // No region was configured, try to determine it from the endpoint. - if (region == null) { - boolean endpointEndsWithCentral = - endpointStr.endsWith(CENTRAL_ENDPOINT); + boolean endpointEndsWithCentral = + endpointStr.endsWith(CENTRAL_ENDPOINT); + // No region was configured or the endpoint is central, + // determine the region from the endpoint. + if (region == null || endpointEndsWithCentral) { region = getS3RegionFromEndpoint(endpointStr, endpointEndsWithCentral); if (region != null) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index a0a01a3cdca36..bc9ffc09a84e0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -160,11 +160,11 @@ public void testCentralEndpointWithRegion() throws Throwable { describe("Create a client with the central endpoint but also specify region"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_WEST_2, false); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_EAST_2, false); expectInterceptorException(client); - client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_1, false); + client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_2, false); expectInterceptorException(client); } @@ -279,6 +279,23 @@ public void testWithVPCE() throws Throwable { public void testCentralEndpointCrossRegionAccess() throws Throwable { describe("Create bucket on different region and access it using central endpoint"); final Configuration conf = getConfiguration(); + removeBaseAndBucketOverrides(conf, ENDPOINT); + + final Configuration newConf = new Configuration(conf); + + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); + + newFS = new S3AFileSystem(); + newFS.initialize(getFileSystem().getUri(), newConf); + + assertOpsUsingNewFs(); + } + + @Test + public void testCentralEndpointWithNullRegionCrossRegionAccess() throws Throwable { + describe( + "Create bucket on different region and access it using central endpoint and null region"); + final Configuration conf = getConfiguration(); removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); final Configuration newConf = new Configuration(conf); @@ -288,6 +305,10 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { newFS = new S3AFileSystem(); newFS.initialize(getFileSystem().getUri(), newConf); + assertOpsUsingNewFs(); + } + + private void assertOpsUsingNewFs() throws IOException { final String file = getMethodName(); final Path basePath = methodPath(); final Path srcDir = new Path(basePath, "srcdir"); @@ -328,7 +349,7 @@ private final class RegionInterceptor implements ExecutionInterceptor { public void beforeExecution(Context.BeforeExecution context, ExecutionAttributes executionAttributes) { - if (endpoint != null && (!endpoint.endsWith(CENTRAL_ENDPOINT) || !US_EAST_2.equals(region))) { + if (endpoint != null && !endpoint.endsWith(CENTRAL_ENDPOINT)) { Assertions.assertThat( executionAttributes.getAttribute(AwsExecutionAttribute.ENDPOINT_OVERRIDDEN)) .describedAs("Endpoint not overridden").isTrue(); From b1f4df925e5883eae54a3ba08f1172b2738e825c Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 30 Jan 2024 13:50:51 -0800 Subject: [PATCH 16/19] central endpoint src + more tests + doc update --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 31 ++++++++------- .../tools/hadoop-aws/aws_sdk_v2_changelog.md | 18 ++++++++- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 38 ++++++++++++++++--- 3 files changed, 63 insertions(+), 24 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 0ee4f3513c63e..8b8bf1283082e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -290,35 +290,34 @@ private , ClientT> void builder.fipsEnabled(fipsEnabled); if (endpoint != null) { - boolean overrideEndpoint = true; checkArgument(!fipsEnabled, "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint); boolean endpointEndsWithCentral = endpointStr.endsWith(CENTRAL_ENDPOINT); - // No region was configured or the endpoint is central, + + // No region was configured, // determine the region from the endpoint. - if (region == null || endpointEndsWithCentral) { + if (region == null) { region = getS3RegionFromEndpoint(endpointStr, endpointEndsWithCentral); if (region != null) { origin = "endpoint"; - if (endpointEndsWithCentral) { - // No need to override endpoint with "s3.amazonaws.com". - // Let the client take care of endpoint resolution. Overriding - // the endpoint with "s3.amazonaws.com" causes 400 Bad Request - // errors for non-existent buckets and objects. - // ref: https://github.com/aws/aws-sdk-java-v2/issues/4846 - overrideEndpoint = false; - builder.crossRegionAccessEnabled(true); - origin = "origin with cross region access"; - LOG.debug("Enabling cross region access for endpoint {}", - endpointStr); - } } } - if (overrideEndpoint) { + + // No need to override endpoint with "s3.amazonaws.com". + // Let the client take care of endpoint resolution. Overriding + // the endpoint with "s3.amazonaws.com" causes 400 Bad Request + // errors for non-existent buckets and objects. + // ref: https://github.com/aws/aws-sdk-java-v2/issues/4846 + if (!endpointEndsWithCentral) { builder.endpointOverride(endpoint); LOG.debug("Setting endpoint to {}", endpoint); + } else { + builder.crossRegionAccessEnabled(true); + origin = "origin with cross region access"; + LOG.debug("Enabling cross region access for endpoint {}", + endpointStr); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md index 162f15951f5ca..bf2e416bab329 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md @@ -83,8 +83,8 @@ The table below lists the configurations S3A was using and what they now map to. Previously, if no endpoint and region was configured, fall back to using us-east-1. Set withForceGlobalBucketAccessEnabled(true) which will allow access to buckets not in this region too. -Since the SDK V2 no longer supports cross region access, we need to set the region and endpoint of -the bucket. The behaviour has now been changed to: +Since the SDK V2 no longer supports cross region access by default, we need to set the region and +endpoint of the bucket. The behaviour has now been changed to: * If no endpoint is specified, use s3.amazonaws.com. * When setting the endpoint, also set the protocol (HTTP or HTTPS) @@ -93,6 +93,20 @@ the bucket. The behaviour has now been changed to: response. Otherwise it will throw an error with status code 301 permanently moved. This error contains the region of the bucket in its header, which we can then use to configure the client. +The behaviour with central endpoint: + +* If fs.s3a.endpoint is set to s3.amazonaws.com and fs.s3a.endpoint.region is not set, S3A + sets the SDK region to us-east-2. +* If fs.s3a.endpoint is set to s3.amazonaws.com, S3A does not override SDK endpoint, leaving + the endpoint resolution to the SDK. +* If fs.s3a.endpoint is set to s3.amazonaws.com and fs.s3a.endpoint.region is set, the SDK + region will be set to the same value as fs.s3a.endpoint.region value. +* If fs.s3a.endpoint is set to s3.amazonaws.com, S3A enables cross region access using SDK API + crossRegionAccessEnabled(true). + +The central endpoint behaviour changes are introduced by +[HADOOP-19044](https://issues.apache.org/jira/browse/HADOOP-19044). + ### List Object: There is no way to paginate the listObject V1 result, we are diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index bc9ffc09a84e0..b62b741e919a0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -160,11 +160,19 @@ public void testCentralEndpointWithRegion() throws Throwable { describe("Create a client with the central endpoint but also specify region"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_EAST_2, false); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_WEST_2, false); expectInterceptorException(client); - client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_2, false); + client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_1, false); + + expectInterceptorException(client); + + client = createS3Client(conf, CENTRAL_ENDPOINT, EU_WEST_2, EU_WEST_2, false); + + expectInterceptorException(client); + + client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_2, US_EAST_2, false); expectInterceptorException(client); } @@ -276,14 +284,32 @@ public void testWithVPCE() throws Throwable { } @Test - public void testCentralEndpointCrossRegionAccess() throws Throwable { - describe("Create bucket on different region and access it using central endpoint"); + public void testCentralEndpointWithUSWest2Region() throws Throwable { + describe("Access bucket using central endpoint and us-west-2 region"); final Configuration conf = getConfiguration(); - removeBaseAndBucketOverrides(conf, ENDPOINT); + removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); + + final Configuration newConf = new Configuration(conf); + + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); + newConf.set(AWS_REGION, US_WEST_2); + + newFS = new S3AFileSystem(); + newFS.initialize(getFileSystem().getUri(), newConf); + + assertOpsUsingNewFs(); + } + + @Test + public void testCentralEndpointWithEUWest2Region() throws Throwable { + describe("Access bucket using central endpoint and eu-west-2 region"); + final Configuration conf = getConfiguration(); + removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); final Configuration newConf = new Configuration(conf); newConf.set(ENDPOINT, CENTRAL_ENDPOINT); + newConf.set(AWS_REGION, EU_WEST_2); newFS = new S3AFileSystem(); newFS.initialize(getFileSystem().getUri(), newConf); @@ -292,7 +318,7 @@ public void testCentralEndpointCrossRegionAccess() throws Throwable { } @Test - public void testCentralEndpointWithNullRegionCrossRegionAccess() throws Throwable { + public void testCentralEndpointWithNullRegion() throws Throwable { describe( "Create bucket on different region and access it using central endpoint and null region"); final Configuration conf = getConfiguration(); From 44760ca66a3cb735f56c1d03948ced426b8d2fc0 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 1 Feb 2024 00:09:12 -0800 Subject: [PATCH 17/19] addressing reviews - origin change + public bucket tests + doc updates --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- .../tools/hadoop-aws/aws_sdk_v2_changelog.md | 16 +--- .../markdown/tools/hadoop-aws/connecting.md | 36 +++++++ .../site/markdown/tools/hadoop-aws/index.md | 2 + .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 96 +++++++++++++------ .../fs/s3a/test/PublicDatasetTestUtils.java | 7 ++ 6 files changed, 115 insertions(+), 44 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 8b8bf1283082e..284ba8e6ae5c9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -315,7 +315,7 @@ private , ClientT> void LOG.debug("Setting endpoint to {}", endpoint); } else { builder.crossRegionAccessEnabled(true); - origin = "origin with cross region access"; + origin = "central endpoint with cross region access"; LOG.debug("Enabling cross region access for endpoint {}", endpointStr); } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md index bf2e416bab329..de3808c54dcb9 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_v2_changelog.md @@ -83,7 +83,7 @@ The table below lists the configurations S3A was using and what they now map to. Previously, if no endpoint and region was configured, fall back to using us-east-1. Set withForceGlobalBucketAccessEnabled(true) which will allow access to buckets not in this region too. -Since the SDK V2 no longer supports cross region access by default, we need to set the region and +Since the SDK V2 no longer supports cross region access, we need to set the region and endpoint of the bucket. The behaviour has now been changed to: * If no endpoint is specified, use s3.amazonaws.com. @@ -93,20 +93,6 @@ endpoint of the bucket. The behaviour has now been changed to: response. Otherwise it will throw an error with status code 301 permanently moved. This error contains the region of the bucket in its header, which we can then use to configure the client. -The behaviour with central endpoint: - -* If fs.s3a.endpoint is set to s3.amazonaws.com and fs.s3a.endpoint.region is not set, S3A - sets the SDK region to us-east-2. -* If fs.s3a.endpoint is set to s3.amazonaws.com, S3A does not override SDK endpoint, leaving - the endpoint resolution to the SDK. -* If fs.s3a.endpoint is set to s3.amazonaws.com and fs.s3a.endpoint.region is set, the SDK - region will be set to the same value as fs.s3a.endpoint.region value. -* If fs.s3a.endpoint is set to s3.amazonaws.com, S3A enables cross region access using SDK API - crossRegionAccessEnabled(true). - -The central endpoint behaviour changes are introduced by -[HADOOP-19044](https://issues.apache.org/jira/browse/HADOOP-19044). - ### List Object: There is no way to paginate the listObject V1 result, we are diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index 600e1e128a2c8..7f37b60c2e621 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -99,6 +99,42 @@ With the move to the AWS V2 SDK, there is more emphasis on the region, set by th Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store. +### S3 endpoint and region settings in detail + +* Configs `fs.s3a.endpoint` and `fs.s3a.endpoint.region` are used to set values + for S3 endpoint and region respectively. +* If `fs.s3a.endpoint.region` is configured with valid AWS region value, S3A will + configure the S3 client to use this value. If this is set to a region that does + not match your bucket, you will receive a 301 redirect response. +* If `fs.s3a.endpoint.region` is not set and `fs.s3a.endpoint` is set with valid + endpoint value, S3A will attempt to parse the region from the endpoint and + configure S3 client to use the region value. +* If both `fs.s3a.endpoint` and `fs.s3a.endpoint.region` are not set, S3A will + use `us-east-2` as default region and enable cross region access. In this case, + S3A does not attempt to override the endpoint while configuring the S3 client. +* If `fs.s3a.endpoint` is not set and `fs.s3a.endpoint.region` is set to an empty + string, S3A will configure S3 client without any region or endpoint override. + This will allow fallback to S3 SDK region resolution chain. More details + [here](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/region-selection.html). +* If `fs.s3a.endpoint` is set to central endpoint `s3.amazonaws.com` and + `fs.s3a.endpoint.region` is not set, S3A will use `us-east-2` as default region + and enable cross region access. In this case, S3A does not attempt to override + the endpoint while configuring the S3 client. +* If `fs.s3a.endpoint` is set to central endpoint `s3.amazonaws.com` and + `fs.s3a.endpoint.region` is also set to some region, S3A will use that region + value and enable cross region access. In this case, S3A does not attempt to + override the endpoint while configuring the S3 client. + +When the cross region access is enabled while configuring the S3 client, even if the +region set is incorrect, S3 SDK determines the region. This is done by making the +request, and if the SDK receives 301 redirect response, it determines the region at +the cost of a HEAD request, and caches it. + +Please note that some endpoint and region settings that require cross region access +are complex and improving over time. Hence, they may be considered unstable. + +If you are working with third party stores, please check [third party stores in detail](third_party_stores.html). + ### Network timeouts See [Timeouts](performance.html#timeouts). diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 0f09c7f873152..b3e41a632a487 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -228,6 +228,8 @@ If you do any of these: change your credentials immediately! See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md). +Also, please check [S3 endpoint and region settings in detail](connecting.html#s3_endpoint_region_details). + ## Authenticating with S3 Except when interacting with public S3 buckets, the S3A client diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index b62b741e919a0..7c3e6bc59600d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -42,13 +42,16 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; +import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils; +import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils.DEFAULT_REQUESTER_PAYS_BUCKET_NAME; import static org.apache.hadoop.io.IOUtils.closeStream; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -160,21 +163,16 @@ public void testCentralEndpointWithRegion() throws Throwable { describe("Create a client with the central endpoint but also specify region"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, US_WEST_2, false); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, US_WEST_2, + US_WEST_2, false); expectInterceptorException(client); - client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, US_EAST_1, false); + client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_1, + US_EAST_1, false); expectInterceptorException(client); - client = createS3Client(conf, CENTRAL_ENDPOINT, EU_WEST_2, EU_WEST_2, false); - - expectInterceptorException(client); - - client = createS3Client(conf, CENTRAL_ENDPOINT, US_EAST_2, US_EAST_2, false); - - expectInterceptorException(client); } @Test @@ -284,48 +282,90 @@ public void testWithVPCE() throws Throwable { } @Test - public void testCentralEndpointWithUSWest2Region() throws Throwable { - describe("Access bucket using central endpoint and us-west-2 region"); + public void testCentralEndpointAndDifferentRegion() throws Throwable { + describe("Access public bucket using central endpoint and region " + + "different than that of the public bucket"); final Configuration conf = getConfiguration(); - removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); - final Configuration newConf = new Configuration(conf); + removeBaseAndBucketOverrides( + newConf, + ENDPOINT, + AWS_REGION, + ALLOW_REQUESTER_PAYS, + KEY_REQUESTER_PAYS_FILE); + + removeBaseAndBucketOverrides( + DEFAULT_REQUESTER_PAYS_BUCKET_NAME, + newConf, + ENDPOINT, + AWS_REGION, + ALLOW_REQUESTER_PAYS, + KEY_REQUESTER_PAYS_FILE); + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); - newConf.set(AWS_REGION, US_WEST_2); + newConf.set(AWS_REGION, EU_WEST_1); + newConf.setBoolean(ALLOW_REQUESTER_PAYS, true); - newFS = new S3AFileSystem(); - newFS.initialize(getFileSystem().getUri(), newConf); + Path filePath = new Path(PublicDatasetTestUtils + .getRequesterPaysObject(newConf)); + newFS = (S3AFileSystem) filePath.getFileSystem(newConf); - assertOpsUsingNewFs(); + Assertions + .assertThat(newFS.exists(filePath)) + .describedAs("Existence of path: " + filePath) + .isTrue(); } @Test - public void testCentralEndpointWithEUWest2Region() throws Throwable { - describe("Access bucket using central endpoint and eu-west-2 region"); + public void testCentralEndpointAndSameRegion() throws Throwable { + describe("Access public bucket using central endpoint and region " + + "same as that of the public bucket"); final Configuration conf = getConfiguration(); - removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); - final Configuration newConf = new Configuration(conf); + removeBaseAndBucketOverrides( + newConf, + ENDPOINT, + AWS_REGION, + ALLOW_REQUESTER_PAYS, + KEY_REQUESTER_PAYS_FILE); + + removeBaseAndBucketOverrides( + DEFAULT_REQUESTER_PAYS_BUCKET_NAME, + newConf, + ENDPOINT, + AWS_REGION, + ALLOW_REQUESTER_PAYS, + KEY_REQUESTER_PAYS_FILE); + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); - newConf.set(AWS_REGION, EU_WEST_2); + newConf.set(AWS_REGION, US_WEST_2); + newConf.setBoolean(ALLOW_REQUESTER_PAYS, true); - newFS = new S3AFileSystem(); - newFS.initialize(getFileSystem().getUri(), newConf); + Path filePath = new Path(PublicDatasetTestUtils + .getRequesterPaysObject(newConf)); + newFS = (S3AFileSystem) filePath.getFileSystem(newConf); - assertOpsUsingNewFs(); + Assertions + .assertThat(newFS.exists(filePath)) + .describedAs("Existence of path: " + filePath) + .isTrue(); } @Test public void testCentralEndpointWithNullRegion() throws Throwable { - describe( - "Create bucket on different region and access it using central endpoint and null region"); + describe("Create bucket on different region and access it using" + + " central endpoint and null region"); final Configuration conf = getConfiguration(); - removeBaseAndBucketOverrides(conf, ENDPOINT, AWS_REGION); final Configuration newConf = new Configuration(conf); + removeBaseAndBucketOverrides( + newConf, + ENDPOINT, + AWS_REGION); + newConf.set(ENDPOINT, CENTRAL_ENDPOINT); newFS = new S3AFileSystem(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/PublicDatasetTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/PublicDatasetTestUtils.java index 669acd8b8bd56..7ef2449b8e83f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/PublicDatasetTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/PublicDatasetTestUtils.java @@ -53,6 +53,13 @@ private PublicDatasetTestUtils() {} private static final String DEFAULT_REQUESTER_PAYS_FILE = "s3a://usgs-landsat/collection02/catalog.json"; + /** + * Default bucket name for the requester pays bucket. + * Value = {@value}. + */ + public static final String DEFAULT_REQUESTER_PAYS_BUCKET_NAME = + "usgs-landsat"; + /** * Default bucket for an S3A file system with many objects: {@value}. * From 12e503eb1eb25f66b7321edcaee33d02504be825 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 1 Feb 2024 00:24:43 -0800 Subject: [PATCH 18/19] minor test changes --- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 7c3e6bc59600d..6d246e8f85ad5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -38,9 +38,9 @@ import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils; @@ -282,7 +282,7 @@ public void testWithVPCE() throws Throwable { } @Test - public void testCentralEndpointAndDifferentRegion() throws Throwable { + public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { describe("Access public bucket using central endpoint and region " + "different than that of the public bucket"); final Configuration conf = getConfiguration(); @@ -318,7 +318,7 @@ public void testCentralEndpointAndDifferentRegion() throws Throwable { } @Test - public void testCentralEndpointAndSameRegion() throws Throwable { + public void testCentralEndpointAndSameRegionAsBucket() throws Throwable { describe("Access public bucket using central endpoint and region " + "same as that of the public bucket"); final Configuration conf = getConfiguration(); @@ -354,9 +354,9 @@ public void testCentralEndpointAndSameRegion() throws Throwable { } @Test - public void testCentralEndpointWithNullRegion() throws Throwable { - describe("Create bucket on different region and access it using" - + " central endpoint and null region"); + public void testCentralEndpointAndNullRegionWithCRUD() throws Throwable { + describe("Access the test bucket using central endpoint and" + + " null region, perform file system CRUD operations"); final Configuration conf = getConfiguration(); final Configuration newConf = new Configuration(conf); @@ -385,8 +385,27 @@ private void assertOpsUsingNewFs() throws IOException { out.write(new byte[] {1, 2, 3}); } - ContractTestUtils.assertIsFile(getFileSystem(), srcFilePath); - ContractTestUtils.assertIsFile(newFS, srcFilePath); + Assertions + .assertThat(newFS.exists(srcFilePath)) + .describedAs("Existence of file: " + srcFilePath) + .isTrue(); + Assertions + .assertThat(getFileSystem().exists(srcFilePath)) + .describedAs("Existence of file: " + srcFilePath) + .isTrue(); + + byte[] buffer = new byte[3]; + + try (FSDataInputStream in = newFS.open(srcFilePath)) { + Assertions + .assertThat(in.read(buffer, 0, 3)) + .describedAs("Total bytes read from " + srcFilePath) + .isEqualTo(3); + Assertions + .assertThat(buffer) + .describedAs("Contents read from " + srcFilePath) + .containsExactly(1, 2, 3); + } newFS.delete(srcDir, true); From 09ff933c243e7fec806e28fbdbb990ae5762a956 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 1 Feb 2024 11:58:36 -0800 Subject: [PATCH 19/19] minor test and doc changes --- .../src/site/markdown/tools/hadoop-aws/index.md | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index b3e41a632a487..cdc0134e60a84 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -226,7 +226,7 @@ If you do any of these: change your credentials immediately! ## Connecting to Amazon S3 or a third-party store -See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md). +See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.html). Also, please check [S3 endpoint and region settings in detail](connecting.html#s3_endpoint_region_details). diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 6d246e8f85ad5..95f31d7527f86 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -397,10 +397,7 @@ private void assertOpsUsingNewFs() throws IOException { byte[] buffer = new byte[3]; try (FSDataInputStream in = newFS.open(srcFilePath)) { - Assertions - .assertThat(in.read(buffer, 0, 3)) - .describedAs("Total bytes read from " + srcFilePath) - .isEqualTo(3); + in.readFully(buffer); Assertions .assertThat(buffer) .describedAs("Contents read from " + srcFilePath) @@ -411,11 +408,11 @@ private void assertOpsUsingNewFs() throws IOException { Assertions .assertThat(newFS.exists(srcFilePath)) - .describedAs("Existence of file: " + srcFilePath) + .describedAs("Existence of file: " + srcFilePath + " using new FS") .isFalse(); Assertions .assertThat(getFileSystem().exists(srcFilePath)) - .describedAs("Existence of file: " + srcFilePath) + .describedAs("Existence of file: " + srcFilePath + " using original FS") .isFalse(); }