From 5580fa2f0d366ae6f9ba0379666f240be28dcb1e Mon Sep 17 00:00:00 2001 From: Shintaro Onuma Date: Fri, 9 Feb 2024 14:59:52 +0000 Subject: [PATCH 1/2] Parse S3 VPC endpoints for the region --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 16 +++++++++++- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 13 +++++++++- .../hadoop/fs/s3a/TestS3AEndpointParsing.java | 25 +++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 284ba8e6ae5c9..f73ca47e3a2fd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; @@ -82,6 +84,9 @@ public class DefaultS3ClientFactory extends Configured private static final String S3_SERVICE_NAME = "s3"; + private static final Pattern VPC_ENDPOINT_PATTERN = + Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$"); + /** * Subclasses refer to this. */ @@ -379,10 +384,19 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * @param endpointEndsWithCentral true if the endpoint is configured as central. * @return the S3 region, null if unable to resolve from endpoint. */ - private static Region getS3RegionFromEndpoint(final String endpoint, + @VisibleForTesting + static Region getS3RegionFromEndpoint(final String endpoint, final boolean endpointEndsWithCentral) { if (!endpointEndsWithCentral) { + // S3 VPC endpoint parsing + Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint); + if (matcher.find()) { + LOG.debug("Mapping to VPCE"); + LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1)); + return Region.of(matcher.group(1)); + } + LOG.debug("Endpoint {} is not the default; parsing", endpoint); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 95f31d7527f86..67003adddca9b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -89,6 +89,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String CN_VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.cn-northwest-1.vpce.amazonaws.com.cn"; + public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; /** @@ -271,7 +273,6 @@ public void testWithGovCloudEndpoint() throws Throwable { } @Test - @Ignore("Pending HADOOP-18938. S3A region logic to handle vpce and non standard endpoints") public void testWithVPCE() throws Throwable { describe("Test with vpc endpoint"); Configuration conf = getConfiguration(); @@ -281,6 +282,16 @@ public void testWithVPCE() throws Throwable { expectInterceptorException(client); } + @Test + public void testWithChinaVPCE() throws Throwable { + describe("Test with china vpc endpoint"); + Configuration conf = getConfiguration(); + + S3Client client = createS3Client(conf, CN_VPC_ENDPOINT, null, CN_NORTHWEST_1, false); + + expectInterceptorException(client); + } + @Test public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { describe("Access public bucket using central endpoint and region " diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java new file mode 100644 index 0000000000000..e456503ccb87e --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java @@ -0,0 +1,25 @@ +package org.apache.hadoop.fs.s3a; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import software.amazon.awssdk.regions.Region; + +public class TestS3AEndpointParsing extends AbstractS3AMockTest { + + private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String NON_VPC_ENDPOINT = "s3.eu-west-1.amazonaws.com"; + private static final String US_WEST_2 = "us-west-2"; + private static final String EU_WEST_1 = "eu-west-1"; + + @Test + public void testVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(US_WEST_2)); + } + + @Test + public void testNonVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(NON_VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(EU_WEST_1)); + } +} From 97360ba71f24df4cfc2d44f2f05c1bee0129a968 Mon Sep 17 00:00:00 2001 From: Shintaro Onuma Date: Fri, 9 Feb 2024 17:45:51 +0000 Subject: [PATCH 2/2] Add licence --- .../hadoop/fs/s3a/TestS3AEndpointParsing.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java index e456503ccb87e..8a77c102ac67d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.s3a; import org.assertj.core.api.Assertions;