From 6cebf7a6ca51946129f03750e22ced047346349f Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 25 Mar 2020 17:20:12 +0000 Subject: [PATCH] HADOOP-16939 fs.s3a.authoritative.path should support multiple FS URIs add unit test, new ITest and then fix the issue: different schema, bucket == skip factored out the underlying logic for unit testing; also moved maybeAddTrailingSlash to S3AUtils (while retaining/forwarnding existing method in S3AFS). tested: london, sole failure is testListingDelete[auth=true](org.apache.hadoop.fs.s3a.ITestS3GuardOutOfBandOperations) filed HADOOP-16853 Change-Id: I4b8d0024469551eda0ec70b4968cba4abed405ed --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 6 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 15 ++ .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 37 ++++- .../hadoop/fs/s3a/ITestAuthoritativePath.java | 22 +++ .../fs/s3a/s3guard/TestAuthoritativePath.java | 131 ++++++++++++++++++ 5 files changed, 203 insertions(+), 8 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestAuthoritativePath.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f083278741322..9bd33a48df1b7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -974,11 +974,7 @@ public String pathToKey(Path path) { */ @InterfaceAudience.Private public String maybeAddTrailingSlash(String key) { - if (!key.isEmpty() && !key.endsWith("/")) { - return key + '/'; - } else { - return key; - } + return S3AUtils.maybeAddTrailingSlash(key); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 3775848fc8daa..1d399505f5823 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1726,6 +1726,21 @@ public static String getBucketOption(Configuration conf, String bucket, return conf.get(FS_S3A_BUCKET_PREFIX + bucket + '.' + baseKey); } + /** + * Turns a path (relative or otherwise) into an S3 key, adding a trailing + * "/" if the path is not the root and does not already have a "/" + * at the end. + * + * @param key s3 key or "" + * @return the with a trailing "/", or, if it is the root key, "", + */ + public static String maybeAddTrailingSlash(String key) { + if (!key.isEmpty() && !key.endsWith("/")) { + return key + '/'; + } else { + return key; + } + } /** * Path filter which ignores any file which starts with . or _. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index fa1b1dcb5a309..877dc58612b65 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -30,6 +30,7 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -974,13 +975,43 @@ public static DirListingMetadata listChildrenWithTtl(MetadataStore ms, } public static Collection getAuthoritativePaths(S3AFileSystem fs) { + return getAuthoritativePaths( + fs.getUri(), + fs.getConf(), + p -> fs.maybeAddTrailingSlash(fs.qualify(p).toString())); + } + + /** + * Get the authoritative paths of a filesystem. + * + * @param uri FS URI + * @param conf configuration + * @param qualifyToDir a qualification operation + * @return list of URIs valid for this FS. + */ + @VisibleForTesting + static Collection getAuthoritativePaths( + final URI uri, + final Configuration conf, + final Function qualifyToDir) { String[] rawAuthoritativePaths = - fs.getConf().getTrimmedStrings(AUTHORITATIVE_PATH, DEFAULT_AUTHORITATIVE_PATH); + conf.getTrimmedStrings(AUTHORITATIVE_PATH, DEFAULT_AUTHORITATIVE_PATH); Collection authoritativePaths = new ArrayList<>(); if (rawAuthoritativePaths.length > 0) { for (int i = 0; i < rawAuthoritativePaths.length; i++) { - Path qualified = fs.qualify(new Path(rawAuthoritativePaths[i])); - authoritativePaths.add(fs.maybeAddTrailingSlash(qualified.toString())); + Path path = new Path(rawAuthoritativePaths[i]); + URI pathURI = path.toUri(); + if (pathURI.getAuthority() != null && + !pathURI.getAuthority().equals(uri.getAuthority())) { + // skip on auth + continue; + } + if (pathURI.getScheme() != null && + !pathURI.getScheme().equals(uri.getScheme())) { + // skip on auth + continue; + } + authoritativePaths.add(qualifyToDir.apply(path)); } } return authoritativePaths; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java index eb54c0ee0e7ac..0a91102bf5aa6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java @@ -233,6 +233,28 @@ public void testSingleAuthPath() throws Exception { } } + @Test + public void testAuthPathWithOtherBucket() throws Exception { + Path authPath; + Path nonAuthPath; + S3AFileSystem fs = null; + String landsat = "s3a://landsat-pds/data"; + String decoy2 = "/decoy2"; + + try { + authPath = new Path(testRoot, "testMultiAuthPath-first"); + nonAuthPath = new Path(testRoot, "nonAuth-1"); + fs = createMultiPathAuthFS(authPath.toString(), landsat, decoy2); + assertTrue("No S3Guard store for partially authoritative FS", + fs.hasMetadataStore()); + + runTestInsidePath(fs, authPath); + runTestOutsidePath(fs, nonAuthPath); + } finally { + cleanUpFS(fs); + } + } + @Test public void testMultiAuthPath() throws Exception { Path authPath; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestAuthoritativePath.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestAuthoritativePath.java new file mode 100644 index 0000000000000..c8e56f753bd50 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestAuthoritativePath.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.net.URI; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests of auth path resolution. + */ +public class TestAuthoritativePath extends AbstractHadoopTestBase { + + private final Path root = new Path("/"); + + private URI fsUri; + + private static final String BASE = "s3a://bucket"; + + @Before + public void setup() throws Exception { + fsUri = new URI(BASE +"/"); + } + + private Configuration authPathsConf(String... paths) { + Configuration conf = new Configuration(false); + conf.set(AUTHORITATIVE_PATH, String.join(",", paths)); + return conf; + } + + @Test + public void testResolution() throws Throwable { + assertAuthPaths(l("/one"), "/one/"); + } + + @Test + public void testResolutionWithFQP() throws Throwable { + assertAuthPaths(l("/one/", + BASE + "/two/"), + "/one/", "/two/"); + } + @Test + public void testOtherBucket() throws Throwable { + assertAuthPaths(l("/one/", + "s3a://landsat-pds/", + BASE + "/two/"), + "/one/", "/two/"); + } + + @Test + public void testOtherScheme() throws Throwable { + assertAuthPaths(l("/one/", + "s3a://landsat-pds/", + "http://bucket/two/"), + "/one/"); + } + + /** + * Get the auth paths; qualification is through + * Path.makeQualified not the FS near-equivalent. + * @param conf configuration + * @return list of auth paths. + */ + private Collection getAuthoritativePaths( + Configuration conf) { + + return S3Guard.getAuthoritativePaths(fsUri, conf, + p -> { + Path q = p.makeQualified(fsUri, root); + assertThat(q.toUri().getAuthority()) + .describedAs("Path %s", q) + .isEqualTo(fsUri.getAuthority()); + return S3AUtils.maybeAddTrailingSlash(q.toString()); + }); + } + + /** + * take a varargs list and and return as an array. + * @param s source + * @return the values + */ + private String[] l(String...s) { + return s; + } + + /** + * Assert that the authoritative paths from a source list + * are that expected. + * @param src source entries to set as auth paths + * @param expected the list of auth paths for a filesystem + */ + private void assertAuthPaths(String[] src, String...expected) { + Configuration conf = authPathsConf(src); + List collect = Arrays.stream(expected) + .map(s -> BASE + s) + .collect(Collectors.toList()); + Collection paths = getAuthoritativePaths(conf); + assertThat(paths) + .containsExactlyInAnyOrderElementsOf(collect); + } + +}