From 77dd1038aad9cfea93815e339503a5b07c4d1e87 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Tue, 18 Jan 2022 16:42:25 +0000 Subject: [PATCH 1/2] HADOOP-18068. S3 SDK Upgrade causes AccessPoint ARN endpoint mistranslation [ADDENDUM] Since upgrading the SDK to 1.12.132 the access point endpoint translation was broken. Correct endpoints should start with "s3-accesspoint.", after SDK upgrade they start with "s3.accesspoint-" which messes up tests + region detection by the SDK. - Fixed endpoint translation with a .replace, if SDK will fix the bug then this replace will remain harmless; - Fixed the TestArnResource tests; --- .../src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java | 5 ++++- .../test/java/org/apache/hadoop/fs/s3a/TestArnResource.java | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 7c866ac96765d..92f5e57ece094 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -107,7 +107,10 @@ public String getFullArn() { */ public String getEndpoint() { return RegionUtils.getRegion(accessPointRegionKey) - .getServiceEndpoint("s3"); + .getServiceEndpoint("s3") + // There's a slight issue with getServiceEndpoint which breaks an endpoint related to + // access points, i.e. the correct one starts with "s3-accesspoint." + .replace("s3.accesspoint-", "s3-accesspoint."); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 5ac47752ec826..a37afd19895b6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -46,10 +46,10 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String accessPoint = "testAp"; String accountId = "123456789101"; String[][] regionPartitionEndpoints = new String[][] { - {Regions.EU_WEST_1.getName(), "aws", "eu-west-1.amazonaws.com"}, + {Regions.EU_WEST_1.getName(), "aws", "s3-accesspoint.eu-west-1.amazonaws.com"}, {Regions.US_GOV_EAST_1.getName(), "aws-us-gov", - "us-gov-east-1.amazonaws.com"}, - {Regions.CN_NORTH_1.getName(), "aws-cn", "cn-north-1.amazonaws.com"}, + "s3-accesspoint.us-gov-east-1.amazonaws.com"}, + {Regions.CN_NORTH_1.getName(), "aws-cn", "s3-accesspoint.cn-north-1.amazonaws.com"}, }; for (String[] testPair : regionPartitionEndpoints) { From 2f5901c9fa63ff56af6e7af89bc5283e289d8a9e Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 28 Jan 2022 16:58:13 +0000 Subject: [PATCH 2/2] Removing reliance on SDK Turns out last SDK upgrade broke something it shouldn't and proved how brittle some parts of this code base are. One thing that's not brittle is the endpoint we make requests to. Endpoint seems to be quite easy to generate, thus we're moving away from using the SDK in a "nice" way and we're doing it for ourselves, since it's easier to understand what's happening (and it's 1 line of code). --- .../org/apache/hadoop/fs/s3a/ArnResource.java | 8 +-- .../apache/hadoop/fs/s3a/TestArnResource.java | 53 +++++++++++++------ 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 92f5e57ece094..0294f7722905d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -21,12 +21,12 @@ import javax.annotation.Nonnull; import com.amazonaws.arn.Arn; -import com.amazonaws.regions.RegionUtils; /** * Represents an Arn Resource, this can be an accesspoint or bucket. */ public final class ArnResource { + private final static String ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; /** * Resource name. @@ -106,11 +106,7 @@ public String getFullArn() { * @return resource endpoint. */ public String getEndpoint() { - return RegionUtils.getRegion(accessPointRegionKey) - .getServiceEndpoint("s3") - // There's a slight issue with getServiceEndpoint which breaks an endpoint related to - // access points, i.e. the correct one starts with "s3-accesspoint." - .replace("s3.accesspoint-", "s3-accesspoint."); + return String.format(ACCESSPOINT_ENDPOINT_FORMAT, region); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index a37afd19895b6..36381bf14b169 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -39,39 +39,43 @@ public class TestArnResource extends HadoopTestBase { private final static Logger LOG = LoggerFactory.getLogger(TestArnResource.class); + private final static String MOCK_ACCOUNT = "123456789101"; + @Test public void parseAccessPointFromArn() throws IllegalArgumentException { describe("Parse AccessPoint ArnResource from arn string"); String accessPoint = "testAp"; - String accountId = "123456789101"; String[][] regionPartitionEndpoints = new String[][] { - {Regions.EU_WEST_1.getName(), "aws", "s3-accesspoint.eu-west-1.amazonaws.com"}, - {Regions.US_GOV_EAST_1.getName(), "aws-us-gov", - "s3-accesspoint.us-gov-east-1.amazonaws.com"}, - {Regions.CN_NORTH_1.getName(), "aws-cn", "s3-accesspoint.cn-north-1.amazonaws.com"}, + {Regions.EU_WEST_1.getName(), "aws"}, + {Regions.US_GOV_EAST_1.getName(), "aws-us-gov"}, + {Regions.CN_NORTH_1.getName(), "aws-cn"}, }; for (String[] testPair : regionPartitionEndpoints) { String region = testPair[0]; String partition = testPair[1]; - String endpoint = testPair[2]; - - // arn:partition:service:region:account-id:resource-type/resource-id - String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId, - accessPoint); - ArnResource resource = ArnResource.accessPointFromArn(arn); - assertEquals("Arn does not match", arn, resource.getFullArn()); + ArnResource resource = getArnResourceFrom(partition, region, MOCK_ACCOUNT, accessPoint); assertEquals("Access Point name does not match", accessPoint, resource.getName()); - assertEquals("Account Id does not match", accountId, resource.getOwnerAccountId()); + assertEquals("Account Id does not match", MOCK_ACCOUNT, resource.getOwnerAccountId()); assertEquals("Region does not match", region, resource.getRegion()); - Assertions.assertThat(resource.getEndpoint()) - .describedAs("Endpoint does not match") - .contains(endpoint); } } + @Test + public void makeSureEndpointHasTheCorrectFormat() { + // Access point (AP) endpoints are different from S3 bucket endpoints, thus when using APs the + // endpoints for the client are modified. This test makes sure endpoint is set up correctly. + ArnResource accessPoint = getArnResourceFrom("aws", "eu-west-1", MOCK_ACCOUNT, + "test"); + String expected = "s3-accesspoint.eu-west-1.amazonaws.com"; + + Assertions.assertThat(accessPoint.getEndpoint()) + .describedAs("Endpoint has invalid format. Access Point requests will not work") + .isEqualTo(expected); + } + @Test public void invalidARNsMustThrow() throws Exception { describe("Using an invalid ARN format must throw when initializing an ArnResource."); @@ -80,6 +84,23 @@ public void invalidARNsMustThrow() throws Exception { ArnResource.accessPointFromArn("invalid:arn:resource")); } + /** + * Create an {@link ArnResource} from string components + * @param partition - partition for ARN + * @param region - region for ARN + * @param accountId - accountId for ARN + * @param resourceName - ARN resource name + * @return ArnResource described by its properties + */ + private ArnResource getArnResourceFrom(String partition, String region, String accountId, + String resourceName) { + // arn:partition:service:region:account-id:resource-type/resource-id + String arn = String.format("arn:%s:s3:%s:%s:accesspoint/%s", partition, region, accountId, + resourceName); + + return ArnResource.accessPointFromArn(arn); + } + private void describe(String message) { LOG.info(message); }