From 2139a5aff32938fcf6295d72b4820b18f4624f81 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 24 Jul 2019 23:53:37 +0100 Subject: [PATCH 01/15] HADOOP-13373. Add S3A implementation of FSMainOperationsBaseTest. This adds the ITestS3AFSMainOperations test so that we have some globber tests against the S3A FS. They didn't find problems there, only a bug in setWorkingDir(). Change-Id: Idbdc4b049fa70fa31aa701b2e1c570c5bd208521 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../fs/s3a/ITestS3AFSMainOperations.java | 65 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 159505b055bd5..cb05de1ad1d82 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2472,7 +2472,7 @@ private S3ListRequest createListObjectsRequest(String key, * @param newDir the current working directory. */ public void setWorkingDirectory(Path newDir) { - workingDir = newDir; + workingDir = makeQualified(newDir); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java new file mode 100644 index 0000000000000..6552150ac04d3 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.junit.Ignore; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSMainOperationsBaseTest; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.s3a.S3AContract; + +import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestPath; + +/** + * S3A Test suite for the FSMainOperationsBaseTest tests. + */ +public class ITestS3AFSMainOperations extends FSMainOperationsBaseTest { + + + public ITestS3AFSMainOperations() { + super(createTestPath( + new Path("/ITestS3AFSMainOperations")).toUri().toString()); + } + + @Override + protected FileSystem createFileSystem() throws Exception { + S3AContract contract = new S3AContract(new Configuration()); + contract.init(); + return contract.getTestFileSystem(); + } + + @Override + @Ignore("Permissions not supported") + public void testListStatusThrowsExceptionForUnreadableDir() { + } + + @Override + @Ignore("Permissions not supported") + public void testGlobStatusThrowsExceptionForUnreadableDir() { + } + + @Override + @Ignore("local FS path setup broken") + public void testCopyToLocalWithUseRawLocalFileSystemOption() throws + Exception { + } + +} From 34a25ab9eb09fdbb94c3290ba024cb7f32fda8d2 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 25 Jul 2019 14:18:28 +0100 Subject: [PATCH 02/15] HADOOP-16458: located file status scan failing on S3A Improving diagnostics of scanning * stack traces don't get lost as failures get wrapped/aggregated * debug level logging of what is up in Globber * Preparation for a test of LocatedFileStatus in S3A, though I've got some better ideas there (i.e. make it a scale test) Change-Id: Ie85eef17a52314d35c95defa4105d72b8c84dd20 --- .../java/org/apache/hadoop/fs/Globber.java | 10 +++++ .../apache/hadoop/mapred/FileInputFormat.java | 5 ++- .../hadoop/mapred/InvalidInputException.java | 4 ++ .../mapred/LocatedFileStatusFetcher.java | 15 +++++-- .../mapreduce/lib/input/FileInputFormat.java | 6 ++- .../lib/input/InvalidInputException.java | 4 ++ .../fs/s3a/ITestLocatedFileStatusFetcher.java | 40 +++++++++++++++++++ 7 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index b241a949b1533..89df20ab809d4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -67,6 +67,7 @@ private FileStatus getFileStatus(Path path) throws IOException { return fc.getFileStatus(path); } } catch (FileNotFoundException e) { + LOG.debug("getFileStatus({}) failed; returning null", path, e); return null; } } @@ -79,6 +80,7 @@ private FileStatus[] listStatus(Path path) throws IOException { return fc.util().listStatus(path); } } catch (FileNotFoundException e) { + LOG.debug("listStatus({}) failed; returning empty array", path, e); return new FileStatus[0]; } } @@ -164,6 +166,7 @@ private FileStatus[] doGlob() throws IOException { String pathPatternString = pathPattern.toUri().getPath(); List flattenedPatterns = GlobExpander.expand(pathPatternString); + LOG.debug("Filesystem glob {}", pathPatternString); // Now loop over all flattened patterns. In every case, we'll be trying to // match them to entries in the filesystem. ArrayList results = @@ -175,6 +178,7 @@ private FileStatus[] doGlob() throws IOException { // path you go down influences how the path must be made absolute. Path absPattern = fixRelativePart(new Path( flatPattern.isEmpty() ? Path.CUR_DIR : flatPattern)); + LOG.debug("Pattern: {}", absPattern); // Now we break the flattened, absolute pattern into path components. // For example, /a/*/c would be broken into the list [a, *, c] List components = @@ -212,6 +216,7 @@ private FileStatus[] doGlob() throws IOException { if (globFilter.hasPattern()) { sawWildcard = true; } + LOG.debug("Component {}, patterned={}", component, sawWildcard); if (candidates.isEmpty() && sawWildcard) { // Optimization: if there are no more candidates left, stop examining // the path components. We can only do this if we've already seen @@ -245,6 +250,8 @@ private FileStatus[] doGlob() throws IOException { // incorrectly conclude that /a/b was a file and should not match // /a/*/*. So we use getFileStatus of the path we just listed to // disambiguate. + LOG.debug("listStatus found one entry; disambiguating {}", + children[0]); Path path = candidate.getPath(); FileStatus status = getFileStatus(path); if (status == null) { @@ -257,6 +264,7 @@ private FileStatus[] doGlob() throws IOException { continue; } if (!status.isDirectory()) { + LOG.debug("Resolved entry is a file; skipping: {}", status); continue; } } @@ -312,6 +320,8 @@ private FileStatus[] doGlob() throws IOException { */ if ((!sawWildcard) && results.isEmpty() && (flattenedPatterns.size() <= 1)) { + LOG.debug("No matches found and there was no wildcard in the path {}", + pathPattern); return null; } /* diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java index afdc0ca40002e..b3e2b4ade80fc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java @@ -19,6 +19,7 @@ package org.apache.hadoop.mapred; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -250,7 +251,9 @@ protected FileStatus[] listStatus(JobConf job) throws IOException { job, dirs, recursive, inputFilter, false); locatedFiles = locatedFileStatusFetcher.getFileStatuses(); } catch (InterruptedException e) { - throw new IOException("Interrupted while getting file statuses"); + throw (IOException) + new InterruptedIOException("Interrupted while getting file statuses") + .initCause(e); } result = Iterables.toArray(locatedFiles, FileStatus.class); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java index e1bb36bcdd60c..faf1a3877c16c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java @@ -38,10 +38,14 @@ public class InvalidInputException extends IOException { /** * Create the exception with the given list. + * The first element of the list is used as the init cause value. * @param probs the list of problems to report. this list is not copied. */ public InvalidInputException(List probs) { problems = probs; + if (!probs.isEmpty()) { + initCause(probs.get(0)); + } } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java index 3869c493a06a8..5766245c3090a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java @@ -51,6 +51,9 @@ /** * Utility class to fetch block locations for specified Input paths using a * configured number of threads. + * The thread count is determined from the value of + * "mapreduce.input.fileinputformat.list-status.num-threads" in the + * configuration. */ @Private public class LocatedFileStatusFetcher { @@ -106,10 +109,14 @@ public LocatedFileStatusFetcher(Configuration conf, Path[] dirs, } /** - * Start executing and return FileStatuses based on the parameters specified + * Start executing and return FileStatuses based on the parameters specified. + * * @return fetched file statuses - * @throws InterruptedException - * @throws IOException + * @throws InterruptedException interruption waiting for results. + * @throws IOException IO failure or other error. + * @throws InvalidInputException on an invalid input and the old API + * @throws org.apache.hadoop.mapreduce.lib.input.InvalidInputException on an + * invalid input and the new API. */ public Iterable getFileStatuses() throws InterruptedException, IOException { @@ -285,7 +292,7 @@ public void onFailure(Throwable t) { } } - + /** * Processes an initial Input Path pattern through the globber and PathFilter * to generate a list of files which need further processing. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java index e2658caabe57c..22efe1471f91f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java @@ -19,6 +19,7 @@ package org.apache.hadoop.mapreduce.lib.input; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; @@ -283,7 +284,10 @@ protected List listStatus(JobContext job job.getConfiguration(), dirs, recursive, inputFilter, true); locatedFiles = locatedFileStatusFetcher.getFileStatuses(); } catch (InterruptedException e) { - throw new IOException("Interrupted while getting file statuses"); + throw (IOException) + new InterruptedIOException( + "Interrupted while getting file statuses") + .initCause(e); } result = Lists.newArrayList(locatedFiles); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java index 61e14841de706..1113bec18889a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java @@ -37,10 +37,14 @@ public class InvalidInputException extends IOException { /** * Create the exception with the given list. + * The first element of the list is used as the init cause value. * @param probs the list of problems to report. this list is not copied. */ public InvalidInputException(List probs) { problems = probs; + if (!probs.isEmpty()) { + initCause(probs.get(0)); + } } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java new file mode 100644 index 0000000000000..bd6bf2f6cdbc3 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test the LocatedFileStatusFetcher can do. + * This is related to HADOOP-16458. + * There's basic tests in ITestS3AFSMainOperations; this + * is see if we can create better corner cases. + */ +public class ITestLocatedFileStatusFetcher extends AbstractS3ATestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestLocatedFileStatusFetcher.class); + + @Test + public void testGlobScan() throws Throwable { + + } +} From 241fa4b71866701f0323271bbcac04c5aaee142e Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 26 Jul 2019 21:24:12 +0100 Subject: [PATCH 03/15] HADOOP-16458: Globber to support a no-symlink-FS option this eliminates HEAD requests on scans where candidates are files Change-Id: I56971977e28a3adf09cae3df1c3d6e634cbc7295 --- .../java/org/apache/hadoop/fs/Globber.java | 57 +++++++++++++------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 6 +- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 89df20ab809d4..7a60a9c2cbd65 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -33,7 +33,7 @@ @InterfaceAudience.Private @InterfaceStability.Unstable -class Globber { +public class Globber { public static final Logger LOG = LoggerFactory.getLogger(Globber.class.getName()); @@ -42,6 +42,7 @@ class Globber { private final Path pathPattern; private final PathFilter filter; private final Tracer tracer; + private final boolean resolveSymlinks; public Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.fs = fs; @@ -49,6 +50,7 @@ public Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.pathPattern = pathPattern; this.filter = filter; this.tracer = FsTracer.get(fs.getConf()); + this.resolveSymlinks = true; } public Globber(FileContext fc, Path pathPattern, PathFilter filter) { @@ -57,6 +59,17 @@ public Globber(FileContext fc, Path pathPattern, PathFilter filter) { this.pathPattern = pathPattern; this.filter = filter; this.tracer = fc.getTracer(); + this.resolveSymlinks = true; + } + + public Globber(FileSystem fs, Path pathPattern, PathFilter filter, + boolean resolveSymlinks) { + this.fs = fs; + this.fc = null; + this.pathPattern = pathPattern; + this.filter = filter; + this.tracer = FsTracer.get(fs.getConf()); + this.resolveSymlinks = resolveSymlinks; } private FileStatus getFileStatus(Path path) throws IOException { @@ -250,22 +263,32 @@ private FileStatus[] doGlob() throws IOException { // incorrectly conclude that /a/b was a file and should not match // /a/*/*. So we use getFileStatus of the path we just listed to // disambiguate. - LOG.debug("listStatus found one entry; disambiguating {}", - children[0]); - Path path = candidate.getPath(); - FileStatus status = getFileStatus(path); - if (status == null) { - // null means the file was not found - LOG.warn("File/directory {} not found:" - + " it may have been deleted." - + " If this is an object store, this can be a sign of" - + " eventual consistency problems.", - path); - continue; - } - if (!status.isDirectory()) { - LOG.debug("Resolved entry is a file; skipping: {}", status); - continue; + if (resolveSymlinks) { + LOG.debug("listStatus found one entry; disambiguating {}", + children[0]); + Path path = candidate.getPath(); + FileStatus status = getFileStatus(path); + if (status == null) { + // null means the file was not found + LOG.warn("File/directory {} not found:" + + " it may have been deleted." + + " If this is an object store, this can be a sign of" + + " eventual consistency problems.", + path); + continue; + } + if (!status.isDirectory()) { + LOG.debug("Resolved entry is a file; skipping: {}", status); + continue; + } + } else { + // there's no symlinks in this store, so no need to issue + // another call, just see if the result is a directory or a file + if (children[0].getPath().equals(candidate.getPath())) { + // the listing status is of a file + continue; + } + } } for (FileStatus child : children) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index cb05de1ad1d82..472eac2a57ab4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -94,6 +94,7 @@ import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Globber; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; import org.apache.hadoop.fs.s3a.impl.CopyOutcome; @@ -3669,8 +3670,7 @@ public boolean isMagicCommitPath(Path path) { */ @Override public FileStatus[] globStatus(Path pathPattern) throws IOException { - entryPoint(INVOCATION_GLOB_STATUS); - return super.globStatus(pathPattern); + return globStatus(pathPattern, ACCEPT_ALL); } /** @@ -3681,7 +3681,7 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException { public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { entryPoint(INVOCATION_GLOB_STATUS); - return super.globStatus(pathPattern, filter); + return new Globber(this, pathPattern, filter, false).glob(); } /** From 0b282888e37e4eade089e26a5c56a07bba90bdc0 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 26 Jul 2019 21:28:22 +0100 Subject: [PATCH 04/15] HADOOP-16458 test to explore listFiles/listStatus/globStatus calls on no-read paths key point: LIST always works, but HEAD and GET fail; in auth mode many such failures get skipped. Change-Id: I66fc17021d5f2b4015c9afbb446df6cb4df0b9ce --- .../s3a/auth/ITestRestrictedReadAccess.java | 321 ++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java new file mode 100644 index 0000000000000..b6bd87fe0f205 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.AccessDeniedException; +import java.util.Arrays; +import java.util.Collection; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_ARN; +import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE; +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.lsR; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.auth.RoleModel.Effects; +import static org.apache.hadoop.fs.s3a.auth.RoleModel.Statement; +import static org.apache.hadoop.fs.s3a.auth.RoleModel.directory; +import static org.apache.hadoop.fs.s3a.auth.RoleModel.statement; +import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; +import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.bindRolePolicyStatements; +import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.newAssumedRoleConfig; +import static org.apache.hadoop.test.GenericTestUtils.failif; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * This test creates a client with no read access to the underlying + * filesystem and then tries to perform various read operations on it. + * S3Guard in auth mode always goes to the FS, so we parameterize the + * test for S3Guard + Auth to see how failures move around. + *
    + *
  1. Tests only run if an assumed role is provided.
  2. + *
  3. And the s3guard tests use the local metastore if + * there was not one already.
  4. + *
+ * The tests are all bundled into one big file to reduce setup costs + * on a parameterized test run. + */ +@RunWith(Parameterized.class) +public class ITestRestrictedReadAccess extends AbstractS3ATestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestRestrictedReadAccess.class); + + @Parameterized.Parameters(name = "{0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {"raw", false, false}, + {"nonauth", true, false}, + {"auth", true, true} + }); + } + + private static final Path ROOT = new Path("/"); + + private static final Statement STATEMENT_ALL_BUCKET_READ_ACCESS + = statement(true, S3_ALL_BUCKETS, S3_BUCKET_READ_OPERATIONS); + + private final String name; + + private final boolean s3guard; + + private final boolean authMode; + + /** + * A role FS; if non-null it is closed in teardown. + */ + private S3AFileSystem roleFS; + + public ITestRestrictedReadAccess( + final String name, + final boolean s3guard, + final boolean authMode) { + this.name = name; + this.s3guard = s3guard; + this.authMode = authMode; + } + + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + String bucketName = getTestBucketName(conf); + removeBucketOverrides(bucketName, conf, + S3_METADATA_STORE_IMPL, + METADATASTORE_AUTHORITATIVE); + conf.setClass(Constants.S3_METADATA_STORE_IMPL, + s3guard ? + LocalMetadataStore.class + : NullMetadataStore.class + , MetadataStore.class); + conf.setBoolean(METADATASTORE_AUTHORITATIVE, authMode); + disableFilesystemCaching(conf); + return conf; + } + + @Override + public void setup() throws Exception { + super.setup(); + assumeRoleTests(); + } + + public Path methodPath() throws IOException { + return path(getMethodName()); + } + + @Override + public void teardown() throws Exception { + S3AUtils.closeAll(LOG, roleFS); + super.teardown(); + } + + private void assumeRoleTests() { + assume("No ARN for role tests", !getAssumedRoleARN().isEmpty()); + } + + private String getAssumedRoleARN() { + return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, ""); + } + + /** + * Create the assumed role configuration. + * @return a config bonded to the ARN of the assumed role + */ + public Configuration createAssumedRoleConfig() { + return createAssumedRoleConfig(getAssumedRoleARN()); + } + + /** + * Create a config for an assumed role; it also disables FS caching. + * @param roleARN ARN of role + * @return the new configuration + */ + private Configuration createAssumedRoleConfig(String roleARN) { + return newAssumedRoleConfig(getContract().getConf(), roleARN); + } + + + @Test + public void testNoReadAccess() throws Throwable { + describe("Test failure handling of if the client doesn't" + + " have read access under a path"); + S3AFileSystem realFS = getFileSystem(); + + // avoiding the parameterization to steer clear of accidentally creating + // patterns + Path basePath = path("testNoReadAccess-" + name); + Path noReadDir = new Path(basePath, "noReadDir"); + Path emptyDir = new Path(noReadDir, "emptyDir"); + realFS.mkdirs(emptyDir); + Path emptyFile = new Path(noReadDir, "emptyFile.txt"); + touch(realFS, emptyFile); + Path subDir = new Path(noReadDir, "subDir"); + + Path noReadFile = new Path(subDir, "noReadFile.txt"); + byte[] data = "hello".getBytes(Charset.forName("UTF-8")); + createFile(realFS, noReadFile, true, data); + + // create a role filesystem which does not have read access under a path + Configuration roleConfig = createAssumedRoleConfig(); + bindRolePolicyStatements(roleConfig, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW, + statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), + new Statement(Effects.Deny) + .addActions(S3_PATH_RW_OPERATIONS) + .addResources(directory(noReadDir)) + ); + roleFS = (S3AFileSystem) basePath.getFileSystem(roleConfig); + + // this is a LIST call; there's no marker. + roleFS.listStatus(basePath); + + // this is HEAD + "/" on S3; get on S3Guard auth + roleFS.listStatus(emptyDir); + + lsR(roleFS, noReadDir, true); + + roleFS.getFileStatus(noReadDir); + roleFS.getFileStatus(emptyDir); + if (authMode) { + roleFS.getFileStatus(noReadFile); + } else { + intercept(AccessDeniedException.class, () -> + roleFS.getFileStatus(noReadFile)); + } + intercept(AccessDeniedException.class, () -> + ContractTestUtils.readUTF8(roleFS, noReadFile, data.length)); + if (authMode) { + try (FSDataInputStream is = roleFS.open(emptyFile)) { + Assertions.assertThat(is.read()) + .describedAs("read of empty file") + .isEqualTo(-1); + } + roleFS.getFileStatus(noReadFile); + } else { + intercept(AccessDeniedException.class, () -> + ContractTestUtils.readUTF8(roleFS, emptyFile, 0)); + } + + globFS(realFS, noReadFile, true, true, false); + // a file fails if not in auth mode + globFS(roleFS, noReadFile, true, true, !authMode); + // empty directories don't fail. + assertStatusPathEquals(emptyDir, + globFS(roleFS, emptyDir, true, true, false)); + assertStatusPathEquals(noReadFile, + globFS(roleFS, + new Path(noReadDir, "*/*.txt"), + true, true, false)); + // now run a located file status fetcher against it + } + + protected void assertStatusPathEquals(final Path expected, + final FileStatus[] statuses) { + Assertions.assertThat(statuses) + .hasSize(1); + Assertions.assertThat(statuses[0].getPath()) + .isEqualTo(expected); + } + + /** + * Glob. + * @param fs filesystem to use + * @param path path (which can include patterns) + * @param nonNull must the result be non-null + * @param notEmpty must the result be a non-empty array + * @param expectAuthFailure is auth failure expected? + * @return the result of a successful glob or null if an expected auth + * failure was caught. + * @throws IOException failure. + */ + protected FileStatus[] globFS( + final S3AFileSystem fs, + final Path path, + final boolean nonNull, + final boolean notEmpty, + boolean expectAuthFailure) + throws IOException { + LOG.info("Glob {}", path); + S3ATestUtils.MetricDiff getMetric = new S3ATestUtils.MetricDiff(fs, + Statistic.OBJECT_METADATA_REQUESTS); + S3ATestUtils.MetricDiff listMetric = new S3ATestUtils.MetricDiff(fs, + Statistic.OBJECT_LIST_REQUESTS); + FileStatus[] st = null; + try { + st = fs.globStatus(path); + LOG.info("Metrics:\n {},\n {}", getMetric, listMetric); + if (expectAuthFailure) { + // should have failed here + String resultStr; + if (st == null) { + resultStr = "A null array"; + } else { + resultStr = StringUtils.join(st, ","); + } + fail(String.format("globStatus(%s) should have raised" + + " an exception, but returned %s", path, resultStr)); + } + } catch (AccessDeniedException e) { + LOG.info("Metrics:\n {},\n {}", getMetric, listMetric); + failif(!expectAuthFailure, "Access denied in glob of " + path, + e); + return null; + } + if (nonNull) { + Assertions.assertThat(st) + .describedAs("Glob of %s", path) + .isNotNull(); + } + if (notEmpty) { + Assertions.assertThat(st) + .describedAs("Glob of %s", path) + .isNotEmpty(); + } + return st; + } +} From 1b56a1b121dc08ea0c51e7f985cb0c088e5f6bc9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Sat, 27 Jul 2019 00:48:52 +0100 Subject: [PATCH 05/15] HADOOP-16458: LocatedFileStatusFetcher tests against a restricted S3A Store Lots of tests (bad: all same test case; will fix), to invoke LocatedFileStatusFetcher against various S3 paths Key summary: you get the error seen in the HADOOP-16458 JIRA if * the path contains no pattern and does not exist * the path contains no pattern, exists but doesn't match the supplied pattern filter. Change-Id: Ib66f316a526982136e1345d10e9750aaec48cb95 --- .../mapred/LocatedFileStatusFetcher.java | 2 +- .../s3a/auth/ITestRestrictedReadAccess.java | 214 ++++++++++++++---- 2 files changed, 177 insertions(+), 39 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java index 5766245c3090a..20849d8ff0178 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java @@ -84,7 +84,7 @@ public class LocatedFileStatusFetcher { /** * @param conf configuration for the job * @param dirs the initial list of paths - * @param recursive whether to traverse the patchs recursively + * @param recursive whether to traverse the paths recursively * @param inputFilter inputFilter to apply to the resulting paths * @param newApi whether using the mapred or mapreduce API * @throws InterruptedException diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index b6bd87fe0f205..f9823cc13b7f9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; import org.apache.hadoop.fs.s3a.Constants; @@ -46,6 +47,8 @@ import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; +import org.apache.hadoop.mapred.LocatedFileStatusFetcher; +import org.apache.hadoop.mapreduce.lib.input.InvalidInputException; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; @@ -64,6 +67,7 @@ import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.bindRolePolicyStatements; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.newAssumedRoleConfig; +import static org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS; import static org.apache.hadoop.test.GenericTestUtils.failif; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -86,6 +90,21 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(ITestRestrictedReadAccess.class); + private static final PathFilter ANYTHING = t -> true; + + private static final PathFilter TEXT_FILE = + path -> path.toUri().toString().endsWith(".txt"); + + /** + * Text found in LocatedFileStatusFetcher exception. + */ + private static final String DOES_NOT_EXIST = "does not exist"; + + /** + * Text found in LocatedFileStatusFetcher exception. + */ + private static final String MATCHES_0_FILES = "matches 0 files"; + @Parameterized.Parameters(name = "{0}") public static Collection params() { return Arrays.asList(new Object[][]{ @@ -95,11 +114,6 @@ public static Collection params() { }); } - private static final Path ROOT = new Path("/"); - - private static final Statement STATEMENT_ALL_BUCKET_READ_ACCESS - = statement(true, S3_ALL_BUCKETS, S3_BUCKET_READ_OPERATIONS); - private final String name; private final boolean s3guard; @@ -130,8 +144,8 @@ public Configuration createConfiguration() { conf.setClass(Constants.S3_METADATA_STORE_IMPL, s3guard ? LocalMetadataStore.class - : NullMetadataStore.class - , MetadataStore.class); + : NullMetadataStore.class, + MetadataStore.class); conf.setBoolean(METADATASTORE_AUTHORITATIVE, authMode); disableFilesystemCaching(conf); return conf; @@ -143,10 +157,6 @@ public void setup() throws Exception { assumeRoleTests(); } - public Path methodPath() throws IOException { - return path(getMethodName()); - } - @Override public void teardown() throws Exception { S3AUtils.closeAll(LOG, roleFS); @@ -195,9 +205,14 @@ public void testNoReadAccess() throws Throwable { touch(realFS, emptyFile); Path subDir = new Path(noReadDir, "subDir"); - Path noReadFile = new Path(subDir, "noReadFile.txt"); + Path subdirFile = new Path(subDir, "subdirFile.txt"); byte[] data = "hello".getBytes(Charset.forName("UTF-8")); - createFile(realFS, noReadFile, true, data); + createFile(realFS, subdirFile, true, data); + Path subDir2 = new Path(noReadDir, "subDir2"); + Path subdir2File1 = new Path(subDir2, "subdir2File1.txt"); + Path subdir2File2 = new Path(subDir2, "subdir2File2.docx"); + createFile(realFS, subdir2File1, true, data); + createFile(realFS, subdir2File2, true, data); // create a role filesystem which does not have read access under a path Configuration roleConfig = createAssumedRoleConfig(); @@ -207,8 +222,7 @@ public void testNoReadAccess() throws Throwable { statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), new Statement(Effects.Deny) .addActions(S3_PATH_RW_OPERATIONS) - .addResources(directory(noReadDir)) - ); + .addResources(directory(noReadDir))); roleFS = (S3AFileSystem) basePath.getFileSystem(roleConfig); // this is a LIST call; there's no marker. @@ -222,36 +236,151 @@ public void testNoReadAccess() throws Throwable { roleFS.getFileStatus(noReadDir); roleFS.getFileStatus(emptyDir); if (authMode) { - roleFS.getFileStatus(noReadFile); + // auth mode doesn't check S3, so no failure + roleFS.getFileStatus(subdirFile); } else { intercept(AccessDeniedException.class, () -> - roleFS.getFileStatus(noReadFile)); + roleFS.getFileStatus(subdirFile)); } intercept(AccessDeniedException.class, () -> - ContractTestUtils.readUTF8(roleFS, noReadFile, data.length)); + ContractTestUtils.readUTF8(roleFS, subdirFile, data.length)); if (authMode) { + // auth mode doesn't just not check the store, + // because it knows the file length is zero, + // it returns -1 without even opening the file. try (FSDataInputStream is = roleFS.open(emptyFile)) { Assertions.assertThat(is.read()) .describedAs("read of empty file") .isEqualTo(-1); } - roleFS.getFileStatus(noReadFile); + roleFS.getFileStatus(subdirFile); } else { + // non-auth mode, it fails at some point in the opening process. intercept(AccessDeniedException.class, () -> ContractTestUtils.readUTF8(roleFS, emptyFile, 0)); } - globFS(realFS, noReadFile, true, true, false); + // Fun with Glob + describe("Glob Status operations"); + // baseline: the real filesystem on a subdir + globFS(realFS, subdirFile, null, false, 1); // a file fails if not in auth mode - globFS(roleFS, noReadFile, true, true, !authMode); + globFS(roleFS, subdirFile, null, !authMode, 1); // empty directories don't fail. assertStatusPathEquals(emptyDir, - globFS(roleFS, emptyDir, true, true, false)); - assertStatusPathEquals(noReadFile, - globFS(roleFS, - new Path(noReadDir, "*/*.txt"), - true, true, false)); + globFS(roleFS, emptyDir, null, false, 1)); + + Path textFilePathPattern = new Path(noReadDir, "*/*.txt"); + FileStatus[] st = globFS(roleFS, + textFilePathPattern, + null, false, 2); + Assertions.assertThat(st) + .extracting("getPath") + .containsExactlyInAnyOrder(subdirFile, subdir2File1); + + globFS(roleFS, + new Path(noReadDir, "*/*.docx"), + null, false, 1); + globFS(roleFS, + new Path(noReadDir, "*/*.doc"), + null, false, 0); + globFS(roleFS, noReadDir, + ANYTHING, false, 1); + // now run a located file status fetcher against it + describe("LocatedFileStatusFetcher operations"); + + // use the same filter as FileInputFormat; single thread. + roleConfig.setInt(LIST_STATUS_NUM_THREADS, 1); + LocatedFileStatusFetcher fetcher = + new LocatedFileStatusFetcher( + roleConfig, + new Path[]{basePath}, + true, + HIDDEN_FILE_FILTER, + true); + Assertions.assertThat(fetcher.getFileStatuses()) + .describedAs("result of located scan") + .flatExtracting(FileStatus::getPath) + .containsExactlyInAnyOrder( + emptyFile, + subdirFile, + subdir2File1, + subdir2File2); + + describe("LocatedFileStatusFetcher with %s", textFilePathPattern); + roleConfig.setInt(LIST_STATUS_NUM_THREADS, 4); + LocatedFileStatusFetcher fetcher2 = + new LocatedFileStatusFetcher( + roleConfig, + new Path[]{textFilePathPattern}, + true, + ANYTHING, + true); + Assertions.assertThat(fetcher2.getFileStatuses()) + .describedAs("result of located scan") + .isNotNull() + .flatExtracting(FileStatus::getPath) + .containsExactlyInAnyOrder(subdirFile, subdir2File1); + + // query a file and expect the initial scan to fail except in + // auth mode. + describe("LocatedFileStatusFetcher with %s", subdirFile); + roleConfig.setInt(LIST_STATUS_NUM_THREADS, 16); + LocatedFileStatusFetcher fetcher3 = + new LocatedFileStatusFetcher( + roleConfig, + new Path[]{subdirFile}, + true, + TEXT_FILE, + true); + try { + Iterable fetched = fetcher3.getFileStatuses(); + failif(!authMode, "LocatedFileStatusFetcher(" + subdirFile + ")" + + " should have failed"); + Assertions.assertThat(fetched) + .describedAs("result of located scan") + .isNotNull() + .flatExtracting(FileStatus::getPath) + .containsExactly(subdirFile); + } catch (AccessDeniedException e) { + failif(authMode, "LocatedFileStatusFetcher(" + subdirFile + ")", e); + } + + // a file that doesn't exist + Path nonexistent = new Path(noReadDir, "nonexistent"); + intercept(InvalidInputException.class, + DOES_NOT_EXIST, + () -> new LocatedFileStatusFetcher( + roleConfig, + new Path[]{nonexistent}, + true, + ANYTHING, + true) + .getFileStatuses()); + + // a file which exists but which doesn't match the pattern + intercept(InvalidInputException.class, + DOES_NOT_EXIST, + () -> new LocatedFileStatusFetcher( + roleConfig, + new Path[]{noReadDir}, + true, + TEXT_FILE, + true) + .getFileStatuses()); + + // a pattern under a nonexistent path. + intercept( + InvalidInputException.class, + MATCHES_0_FILES, + () -> new LocatedFileStatusFetcher( + roleConfig, + new Path[]{new Path(nonexistent, "*.txt)")}, + true, + TEXT_FILE, + true) + .getFileStatuses()); } protected void assertStatusPathEquals(final Path expected, @@ -266,9 +395,9 @@ protected void assertStatusPathEquals(final Path expected, * Glob. * @param fs filesystem to use * @param path path (which can include patterns) - * @param nonNull must the result be non-null - * @param notEmpty must the result be a non-empty array + * @param filter optional filter * @param expectAuthFailure is auth failure expected? + * @param expectedCount expected count of results; -1 means null response * @return the result of a successful glob or null if an expected auth * failure was caught. * @throws IOException failure. @@ -276,18 +405,20 @@ protected void assertStatusPathEquals(final Path expected, protected FileStatus[] globFS( final S3AFileSystem fs, final Path path, - final boolean nonNull, - final boolean notEmpty, - boolean expectAuthFailure) + final PathFilter filter, + boolean expectAuthFailure, + final int expectedCount) throws IOException { LOG.info("Glob {}", path); S3ATestUtils.MetricDiff getMetric = new S3ATestUtils.MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS); S3ATestUtils.MetricDiff listMetric = new S3ATestUtils.MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS); - FileStatus[] st = null; + FileStatus[] st; try { - st = fs.globStatus(path); + st = filter == null + ? fs.globStatus(path) + : fs.globStatus(path, filter); LOG.info("Metrics:\n {},\n {}", getMetric, listMetric); if (expectAuthFailure) { // should have failed here @@ -306,16 +437,23 @@ protected FileStatus[] globFS( e); return null; } - if (nonNull) { + if (expectedCount < 0) { Assertions.assertThat(st) .describedAs("Glob of %s", path) - .isNotNull(); - } - if (notEmpty) { + .isNull(); + } else { Assertions.assertThat(st) .describedAs("Glob of %s", path) - .isNotEmpty(); + .isNotNull() + .hasSize(expectedCount); } return st; } + + private static final PathFilter HIDDEN_FILE_FILTER = + (p) -> { + String n = p.getName(); + return !n.startsWith("_") && !n.startsWith("."); + }; } + From f5d2f201965457fecc5fa85788c9504ad4440608 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 29 Jul 2019 19:26:07 +0100 Subject: [PATCH 06/15] HADOOP-16348: LocatedFileStatusFetcher and Globber diagnostics LocatedFileStatusFetcher * Debug level logging of every callable invoked, thread count and other useful events. * Minor IDE-related cleanups as this is the first time this file has been looked at for a while. * fixup javadocs Globber * moved to builder pattern to the new (and still marked as private) API For creating a globber. If this really delivers speedups against stores, we'd benefit from it in our own stores and externally in GCS, so ultimately it may become public. For now, just design it ready for that. Invoker * log duration of operations @ debug Change-Id: I87ca7008ab787a848a35457c7a72d8f47e6d558f --- .../java/org/apache/hadoop/fs/FileSystem.java | 7 +- .../java/org/apache/hadoop/fs/Globber.java | 127 +++++++++++++++++- .../apache/hadoop/test/LambdaTestUtils.java | 3 + .../mapred/LocatedFileStatusFetcher.java | 47 +++++-- .../org/apache/hadoop/fs/s3a/Invoker.java | 3 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 14 +- .../s3a/auth/ITestRestrictedReadAccess.java | 14 +- 7 files changed, 187 insertions(+), 28 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 4e9f172a4c79f..2376c051c99f9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -2064,7 +2064,12 @@ public FileStatus[] listStatus(Path[] files, PathFilter filter) * @throws IOException IO failure */ public FileStatus[] globStatus(Path pathPattern) throws IOException { - return new Globber(this, pathPattern, DEFAULT_FILTER).glob(); + return Globber.createGlobber(this) + .withPathPattern(pathPattern) + .withPathFiltern(DEFAULT_FILTER) + .withResolveSymlinks(true) + .build() + .glob(); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 7a60a9c2cbd65..038c30c200a44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -26,11 +26,18 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.DurationInfo; import org.apache.htrace.core.TraceScope; import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Implementation of {@link FileSystem#globStatus(Path, PathFilter)}. + * This has historically been package-private; it has been opened + * up for object stores within the {@code hadoop-*} codebase. + * It could be expanded for external store implementations in future. + */ @InterfaceAudience.Private @InterfaceStability.Unstable public class Globber { @@ -44,7 +51,7 @@ public class Globber { private final Tracer tracer; private final boolean resolveSymlinks; - public Globber(FileSystem fs, Path pathPattern, PathFilter filter) { + Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.fs = fs; this.fc = null; this.pathPattern = pathPattern; @@ -53,7 +60,7 @@ public Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.resolveSymlinks = true; } - public Globber(FileContext fc, Path pathPattern, PathFilter filter) { + Globber(FileContext fc, Path pathPattern, PathFilter filter) { this.fs = null; this.fc = fc; this.pathPattern = pathPattern; @@ -62,14 +69,42 @@ public Globber(FileContext fc, Path pathPattern, PathFilter filter) { this.resolveSymlinks = true; } - public Globber(FileSystem fs, Path pathPattern, PathFilter filter, + /** + * Constructor for use by {@link GlobBuilder}. + * @param fs filesystem + * @param pathPattern path pattern + * @param filter optional filter + * @param resolveSymlinks should symlinks be resolved. + */ + private Globber(FileSystem fs, Path pathPattern, PathFilter filter, boolean resolveSymlinks) { this.fs = fs; this.fc = null; this.pathPattern = pathPattern; this.filter = filter; + this.resolveSymlinks = resolveSymlinks; this.tracer = FsTracer.get(fs.getConf()); + LOG.debug("Created Globber for path={}, symlinks={}", + pathPattern, resolveSymlinks); + } + + /** + * Constructor for use by {@link GlobBuilder}. + * @param fc file context + * @param pathPattern path pattern + * @param filter optional filter + * @param resolveSymlinks should symlinks be resolved. + */ + private Globber(FileContext fc, Path pathPattern, PathFilter filter, + boolean resolveSymlinks) { + this.fs = null; + this.fc = fc; + this.pathPattern = pathPattern; + this.filter = filter; this.resolveSymlinks = resolveSymlinks; + this.tracer = FsTracer.get(fs.getConf()); + LOG.debug("Created Globber path={}, symlinks={}", + pathPattern, resolveSymlinks); } private FileStatus getFileStatus(Path path) throws IOException { @@ -160,7 +195,8 @@ private String authorityFromPath(Path path) throws IOException { public FileStatus[] glob() throws IOException { TraceScope scope = tracer.newScope("Globber#glob"); scope.addKVAnnotation("pattern", pathPattern.toUri().getPath()); - try { + try(DurationInfo ignored = new DurationInfo(LOG, false, + "glob %s", pathPattern)) { return doGlob(); } finally { scope.close(); @@ -357,4 +393,87 @@ private FileStatus[] doGlob() throws IOException { Arrays.sort(ret); return ret; } + + public static GlobBuilder createGlobber(FileSystem fs) { + return new GlobBuilder(fs); + } + + public static GlobBuilder createGlobber(FileContext fc) { + return new GlobBuilder(fc); + } + + /** + * Builder for glob instances. + */ + public static class GlobBuilder { + + private final FileSystem fs; + + private final FileContext fc; + + private Path pathPattern; + + private PathFilter filter; + + private boolean resolveSymlinks = true; + + /** + * Construct bonded to a file context. + * @param fc file context. + */ + public GlobBuilder(final FileContext fc) { + this.fs = null; + this.fc = fc; + } + + /** + * Construct bonded to a filesystem. + * @param fs file system. + */ + public GlobBuilder(final FileSystem fs) { + this.fs = fs; + this.fc = null; + } + + /** + * Set the path pattern. + * @param pattern pattern to use. + * @return the builder + */ + public GlobBuilder withPathPattern(Path pattern) { + pathPattern = pattern; + return this; + } + + /** + * Set the path filter. + * @param pathFilter filter + * @return the builder + */ + public GlobBuilder withPathFiltern(PathFilter pathFilter) { + filter = pathFilter; + return this; + } + + /** + * Set the symlink resolution policy. + * @param resolve resolution flag. + * @return the builder + */ + public GlobBuilder withResolveSymlinks(boolean resolve) { + resolveSymlinks = resolve; + return this; + } + + /** + * Build the Globber. + * @return a new instance. + */ + public Globber build() { + return fs != null + ? new Globber(fs, pathPattern, filter, resolveSymlinks) + : new Globber(fc, pathPattern, filter, resolveSymlinks); + } + + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java index c1b6cc4081e5c..db36154c158ac 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java @@ -575,6 +575,9 @@ private static String robustToString(Object o) { if (o == null) { return NULL_RESULT; } else { + if (o instanceof String) { + return '"' + (String)o + '"'; + } try { return o.toString(); } catch (Exception e) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java index 20849d8ff0178..471f208a14126 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java @@ -46,6 +46,9 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.util.concurrent.HadoopExecutors; /** @@ -58,6 +61,8 @@ @Private public class LocatedFileStatusFetcher { + public static final Logger LOG = + LoggerFactory.getLogger(LocatedFileStatusFetcher.class.getName()); private final Path[] inputDirs; private final PathFilter inputFilter; private final Configuration conf; @@ -67,7 +72,7 @@ public class LocatedFileStatusFetcher { private final ExecutorService rawExec; private final ListeningExecutorService exec; private final BlockingQueue> resultQueue; - private final List invalidInputErrors = new LinkedList(); + private final List invalidInputErrors = new LinkedList<>(); private final ProcessInitialInputPathCallback processInitialInputPathCallback = new ProcessInitialInputPathCallback(); @@ -82,6 +87,9 @@ public class LocatedFileStatusFetcher { private volatile Throwable unknownError; /** + * Instantiate. + * The newApi switch is only used to configure what exception is raised + * on failure of {@link #getFileStatuses()}, it does not change the algorithm. * @param conf configuration for the job * @param dirs the initial list of paths * @param recursive whether to traverse the paths recursively @@ -95,12 +103,14 @@ public LocatedFileStatusFetcher(Configuration conf, Path[] dirs, IOException { int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS, FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS); + LOG.debug("Instantiated LocatedFileStatusFetcher with {} threads", + numThreads); rawExec = HadoopExecutors.newFixedThreadPool( numThreads, new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("GetFileInfo #%d").build()); exec = MoreExecutors.listeningDecorator(rawExec); - resultQueue = new LinkedBlockingQueue>(); + resultQueue = new LinkedBlockingQueue<>(); this.conf = conf; this.inputDirs = dirs; this.recursive = recursive; @@ -110,7 +120,6 @@ public LocatedFileStatusFetcher(Configuration conf, Path[] dirs, /** * Start executing and return FileStatuses based on the parameters specified. - * * @return fetched file statuses * @throws InterruptedException interruption waiting for results. * @throws IOException IO failure or other error. @@ -124,6 +133,7 @@ public Iterable getFileStatuses() throws InterruptedException, // rest being scheduled does not lead to a termination. runningTasks.incrementAndGet(); for (Path p : inputDirs) { + LOG.debug("Queuing scan of directory {}", p); runningTasks.incrementAndGet(); ListenableFuture future = exec .submit(new ProcessInitialInputPathCallable(p, conf, inputFilter)); @@ -135,14 +145,20 @@ public Iterable getFileStatuses() throws InterruptedException, lock.lock(); try { + LOG.debug("Waiting scan completion"); while (runningTasks.get() != 0 && unknownError == null) { condition.await(); } } finally { lock.unlock(); } + // either the scan completed or an error was raised. + // in the case of an error shutting down the executor will interrupt all + // active threads, which can add noise to the logs. + LOG.debug("Scan complete: shutting down"); this.exec.shutdownNow(); if (this.unknownError != null) { + LOG.debug("Scan failed", this.unknownError); if (this.unknownError instanceof Error) { throw (Error) this.unknownError; } else if (this.unknownError instanceof RuntimeException) { @@ -155,7 +171,11 @@ public Iterable getFileStatuses() throws InterruptedException, throw new IOException(this.unknownError); } } - if (this.invalidInputErrors.size() != 0) { + if (!this.invalidInputErrors.isEmpty()) { + LOG.debug("Invalid Input Errors raised"); + for (IOException error : invalidInputErrors) { + LOG.debug("Error", error); + } if (this.newApi) { throw new org.apache.hadoop.mapreduce.lib.input.InvalidInputException( invalidInputErrors); @@ -168,7 +188,7 @@ public Iterable getFileStatuses() throws InterruptedException, /** * Collect misconfigured Input errors. Errors while actually reading file info - * are reported immediately + * are reported immediately. */ private void registerInvalidInputError(List errors) { synchronized (this) { @@ -178,9 +198,10 @@ private void registerInvalidInputError(List errors) { /** * Register fatal errors - example an IOException while accessing a file or a - * full exection queue + * full execution queue. */ private void registerError(Throwable t) { + LOG.debug("Error", t); lock.lock(); try { if (unknownError == null) { @@ -228,7 +249,7 @@ private static class ProcessInputDirCallable implements public Result call() throws Exception { Result result = new Result(); result.fs = fs; - + LOG.debug("ProcessInputDirCallable {}", fileStatus); if (fileStatus.isDirectory()) { RemoteIterator iter = fs .listLocatedStatus(fileStatus.getPath()); @@ -249,8 +270,8 @@ public Result call() throws Exception { } private static class Result { - private List locatedFileStatuses = new LinkedList(); - private List dirsNeedingRecursiveCalls = new LinkedList(); + private List locatedFileStatuses = new LinkedList<>(); + private List dirsNeedingRecursiveCalls = new LinkedList<>(); private FileSystem fs; } } @@ -266,11 +287,12 @@ private class ProcessInputDirCallback implements @Override public void onSuccess(ProcessInputDirCallable.Result result) { try { - if (result.locatedFileStatuses.size() != 0) { + if (!result.locatedFileStatuses.isEmpty()) { resultQueue.add(result.locatedFileStatuses); } - if (result.dirsNeedingRecursiveCalls.size() != 0) { + if (!result.dirsNeedingRecursiveCalls.isEmpty()) { for (FileStatus fileStatus : result.dirsNeedingRecursiveCalls) { + LOG.debug("Queueing directory scan {}", fileStatus.getPath()); runningTasks.incrementAndGet(); ListenableFuture future = exec .submit(new ProcessInputDirCallable(result.fs, fileStatus, @@ -316,6 +338,7 @@ public Result call() throws Exception { Result result = new Result(); FileSystem fs = path.getFileSystem(conf); result.fs = fs; + LOG.debug("ProcessInitialInputPathCallable path {}", path); FileStatus[] matches = fs.globStatus(path, inputFilter); if (matches == null) { result.addError(new IOException("Input path does not exist: " + path)); @@ -344,7 +367,7 @@ void addError(IOException ioe) { /** * The callback handler to handle results generated by - * {@link ProcessInitialInputPathCallable} + * {@link ProcessInitialInputPathCallable}. * */ private class ProcessInitialInputPathCallback implements diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index a59ffa9c6e088..bbb9faa000a08 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -31,6 +31,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.util.DurationInfo; /** * Class to provide lambda expression invocation of AWS operations. @@ -105,7 +106,7 @@ public Retried getRetryCallback() { @Retries.OnceTranslated public static T once(String action, String path, Operation operation) throws IOException { - try { + try (DurationInfo ignored = new DurationInfo(LOG, false, "%s", action)) { return operation.execute(); } catch (AmazonClientException e) { throw S3AUtils.translateException(action, path, e); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 472eac2a57ab4..0d62329cdb51e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3674,14 +3674,22 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException { } /** - * Override superclass so as to add statistic collection. + * Override superclass so as to add statistic collection + * and disable symlink resolution. * {@inheritDoc} */ @Override - public FileStatus[] globStatus(Path pathPattern, PathFilter filter) + public FileStatus[] globStatus( + final Path pathPattern, + final PathFilter filter) throws IOException { entryPoint(INVOCATION_GLOB_STATUS); - return new Globber(this, pathPattern, filter, false).glob(); + return Globber.createGlobber(this) + .withPathPattern(pathPattern) + .withPathFiltern(filter) + .withResolveSymlinks(true) + .build() + .glob(); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index f9823cc13b7f9..1598c4ae48d3f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -327,14 +327,14 @@ public void testNoReadAccess() throws Throwable { // auth mode. describe("LocatedFileStatusFetcher with %s", subdirFile); roleConfig.setInt(LIST_STATUS_NUM_THREADS, 16); - LocatedFileStatusFetcher fetcher3 = - new LocatedFileStatusFetcher( - roleConfig, - new Path[]{subdirFile}, - true, - TEXT_FILE, - true); try { + LocatedFileStatusFetcher fetcher3 = + new LocatedFileStatusFetcher( + roleConfig, + new Path[]{subdirFile}, + true, + TEXT_FILE, + true); Iterable fetched = fetcher3.getFileStatuses(); failif(!authMode, "LocatedFileStatusFetcher(" + subdirFile + ")" + " should have failed"); From 6d18b491defeb750fbd4a1b4e2856391c5279b64 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 31 Jul 2019 13:57:40 +0100 Subject: [PATCH 07/15] HADOOP-16458 add test for no-read-access delete to round the sequence off Change-Id: Ibe7f502a92c67040dbdba464e13751fdbe0db4f3 --- .../s3a/auth/ITestRestrictedReadAccess.java | 295 ++++++++++++++---- 1 file changed, 229 insertions(+), 66 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index 1598c4ae48d3f..f22514d22731b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -18,11 +18,13 @@ package org.apache.hadoop.fs.s3a.auth; +import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.AccessDeniedException; import java.util.Arrays; import java.util.Collection; +import java.util.concurrent.Callable; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -68,6 +70,7 @@ import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.bindRolePolicyStatements; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.newAssumedRoleConfig; import static org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS; +import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; import static org.apache.hadoop.test.GenericTestUtils.failif; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -81,30 +84,64 @@ *
  • And the s3guard tests use the local metastore if * there was not one already.
  • * - * The tests are all bundled into one big file to reduce setup costs - * on a parameterized test run. + * The tests are all bundled into one big test case. + * This is, from a purist unit test perspective, + * utterly wrong as it goes against the "Each test case tests exactly one + * thing" philosophy of JUnit. + * However is significantly reduces setup costs on the parameterized test runs, + * as it means that the filesystems and directories only need to be + * created and destroyed once per parameterized suite, rather than + * once per individual test. + * All the test probes have informative messages so when a test failure + * does occur, its cause should be discoverable. It main weaknesses are + * therefore: + *
      + *
    1. A failure of an assertion blocks all subsequent assertions from + * being checked.
    2. + *
    3. Maintenance is going to be harder.
    4. + *
    */ +@SuppressWarnings("ThrowableNotThrown") @RunWith(Parameterized.class) public class ITestRestrictedReadAccess extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(ITestRestrictedReadAccess.class); - private static final PathFilter ANYTHING = t -> true; + /** Filter to select everything. */ + private static final PathFilter EVERYTHING = t -> true; + /** Filter to select .txt files. */ private static final PathFilter TEXT_FILE = path -> path.toUri().toString().endsWith(".txt"); + /** The same path filter used in FileInputFormat. */ + private static final PathFilter HIDDEN_FILE_FILTER = + (p) -> { + String n = p.getName(); + return !n.startsWith("_") && !n.startsWith("."); + }; + /** - * Text found in LocatedFileStatusFetcher exception. + * Text found in LocatedFileStatusFetcher exception when the glob + * returned "null". */ private static final String DOES_NOT_EXIST = "does not exist"; /** - * Text found in LocatedFileStatusFetcher exception. + * Text found in LocatedFileStatusFetcher exception when + * the glob returned an empty list. */ private static final String MATCHES_0_FILES = "matches 0 files"; + /** + * Text used in files. + */ + public static final byte[] HELLO = "hello".getBytes(Charset.forName("UTF-8")); + + /** + * Parameterization. + */ @Parameterized.Parameters(name = "{0}") public static Collection params() { return Arrays.asList(new Object[][]{ @@ -121,10 +158,16 @@ public static Collection params() { private final boolean authMode; /** - * A role FS; if non-null it is closed in teardown. + * A read-only FS; if non-null it is closed in teardown. */ - private S3AFileSystem roleFS; + private S3AFileSystem readonlyFS; + /** + * Test suite setup. + * @param name name for logs/paths. + * @param s3guard is S3Guard enabled? + * @param authMode is S3Guard in auth mode? + */ public ITestRestrictedReadAccess( final String name, final boolean s3guard, @@ -159,7 +202,7 @@ public void setup() throws Exception { @Override public void teardown() throws Exception { - S3AUtils.closeAll(LOG, roleFS); + S3AUtils.closeAll(LOG, readonlyFS); super.teardown(); } @@ -188,106 +231,164 @@ private Configuration createAssumedRoleConfig(String roleARN) { return newAssumedRoleConfig(getContract().getConf(), roleARN); } - @Test public void testNoReadAccess() throws Throwable { - describe("Test failure handling of if the client doesn't" + describe("Test failure handling if the client doesn't" + " have read access under a path"); S3AFileSystem realFS = getFileSystem(); // avoiding the parameterization to steer clear of accidentally creating // patterns Path basePath = path("testNoReadAccess-" + name); + + // define the paths and create them. + describe("Creating test directories and files"); + + // this is the directory to which the restricted role has no read + // access. Path noReadDir = new Path(basePath, "noReadDir"); + + // an empty directory directory under the noReadDir Path emptyDir = new Path(noReadDir, "emptyDir"); realFS.mkdirs(emptyDir); + + // an empty file directory under the noReadDir Path emptyFile = new Path(noReadDir, "emptyFile.txt"); touch(realFS, emptyFile); + + // a subdirectory Path subDir = new Path(noReadDir, "subDir"); + // and a file in that subdirectory Path subdirFile = new Path(subDir, "subdirFile.txt"); - byte[] data = "hello".getBytes(Charset.forName("UTF-8")); - createFile(realFS, subdirFile, true, data); + createFile(realFS, subdirFile, true, HELLO); Path subDir2 = new Path(noReadDir, "subDir2"); Path subdir2File1 = new Path(subDir2, "subdir2File1.txt"); Path subdir2File2 = new Path(subDir2, "subdir2File2.docx"); - createFile(realFS, subdir2File1, true, data); - createFile(realFS, subdir2File2, true, data); + createFile(realFS, subdir2File1, true, HELLO); + createFile(realFS, subdir2File2, true, HELLO); + // create a role filesystem which does not have read access under a path + // it still has write access, which can be explored in the final + // step to delete files and directories. Configuration roleConfig = createAssumedRoleConfig(); bindRolePolicyStatements(roleConfig, STATEMENT_S3GUARD_CLIENT, STATEMENT_ALLOW_SSE_KMS_RW, statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), new Statement(Effects.Deny) - .addActions(S3_PATH_RW_OPERATIONS) + .addActions(S3_ALL_GET) .addResources(directory(noReadDir))); - roleFS = (S3AFileSystem) basePath.getFileSystem(roleConfig); + readonlyFS = (S3AFileSystem) basePath.getFileSystem(roleConfig); + + // now move up the API Chain, from the calls made by globStatus, + // to globStatus itself, and then to LocatedFileStatusFetcher, + // which invokes globStatus + + // ----------------------------------------------------------------- + // validate basic IO operations. + // ----------------------------------------------------------------- // this is a LIST call; there's no marker. - roleFS.listStatus(basePath); + // so the sequence is + // - HEAD path -> FNFE + // - HEAD path + / -> FNFE + // - LIST path -> list results + // Because the client has list access, this succeeds + readonlyFS.listStatus(basePath); // this is HEAD + "/" on S3; get on S3Guard auth - roleFS.listStatus(emptyDir); + readonlyFS.listStatus(emptyDir); + + // a recursive list of the no-read-directory works because + // there is no directory marker, it becomes a LIST call. + lsR(readonlyFS, noReadDir, true); - lsR(roleFS, noReadDir, true); + // similarly, a getFileStatus ends up being a list and generating + // a file status marker. + readonlyFS.getFileStatus(noReadDir); - roleFS.getFileStatus(noReadDir); - roleFS.getFileStatus(emptyDir); + // empty dir checks work! + readonlyFS.getFileStatus(emptyDir); + + // now look at a file; the outcome depends on the mode. if (authMode) { // auth mode doesn't check S3, so no failure - roleFS.getFileStatus(subdirFile); + readonlyFS.getFileStatus(subdirFile); } else { - intercept(AccessDeniedException.class, () -> - roleFS.getFileStatus(subdirFile)); + accessDenied(() -> + readonlyFS.getFileStatus(subdirFile)); } - intercept(AccessDeniedException.class, () -> - ContractTestUtils.readUTF8(roleFS, subdirFile, data.length)); - if (authMode) { + + // irrespective of mode, the attempt to read the data will fail. + // the only variable is where the failure occurs + accessDenied(() -> + ContractTestUtils.readUTF8(readonlyFS, subdirFile, HELLO.length)); + + // the empty file is interesting + if (!authMode) { + // non-auth mode, it fails at some point in the opening process. + // due to a HEAD being called on the object + accessDenied(() -> + ContractTestUtils.readUTF8(readonlyFS, emptyFile, 0)); + } else { // auth mode doesn't just not check the store, // because it knows the file length is zero, // it returns -1 without even opening the file. - try (FSDataInputStream is = roleFS.open(emptyFile)) { + // see: HADOOP-16464 + try (FSDataInputStream is = readonlyFS.open(emptyFile)) { Assertions.assertThat(is.read()) .describedAs("read of empty file") .isEqualTo(-1); } - roleFS.getFileStatus(subdirFile); - } else { - // non-auth mode, it fails at some point in the opening process. - intercept(AccessDeniedException.class, () -> - ContractTestUtils.readUTF8(roleFS, emptyFile, 0)); + readonlyFS.getFileStatus(subdirFile); } - // Fun with Glob + // ----------------------------------------------------------------- + // Explore Glob's recursive scan + // ----------------------------------------------------------------- + describe("Glob Status operations"); // baseline: the real filesystem on a subdir globFS(realFS, subdirFile, null, false, 1); // a file fails if not in auth mode - globFS(roleFS, subdirFile, null, !authMode, 1); + globFS(readonlyFS, subdirFile, null, !authMode, 1); // empty directories don't fail. assertStatusPathEquals(emptyDir, - globFS(roleFS, emptyDir, null, false, 1)); + globFS(readonlyFS, emptyDir, null, false, 1)); + // wildcard scan to find *.txt Path textFilePathPattern = new Path(noReadDir, "*/*.txt"); - FileStatus[] st = globFS(roleFS, + FileStatus[] st = globFS(readonlyFS, textFilePathPattern, null, false, 2); Assertions.assertThat(st) - .extracting("getPath") + .extracting(FileStatus::getPath) .containsExactlyInAnyOrder(subdirFile, subdir2File1); - globFS(roleFS, + // there is precisely one .docx file (subdir2File2.docx) + globFS(readonlyFS, new Path(noReadDir, "*/*.docx"), null, false, 1); - globFS(roleFS, + + // there are no .doc files. + globFS(readonlyFS, new Path(noReadDir, "*/*.doc"), null, false, 0); - globFS(roleFS, noReadDir, - ANYTHING, false, 1); - - // now run a located file status fetcher against it + globFS(readonlyFS, noReadDir, + EVERYTHING, false, 1); + // and a filter without any wildcarded pattern only finds + // the role dir itself. + FileStatus[] st2 = globFS(readonlyFS, noReadDir, + EVERYTHING, false, 1); + Assertions.assertThat(st2) + .extracting(FileStatus::getPath) + .containsExactly(noReadDir); + + // ----------------------------------------------------------------- + // now run a located file status fetcher against the directory tree. + // ----------------------------------------------------------------- describe("LocatedFileStatusFetcher operations"); // use the same filter as FileInputFormat; single thread. @@ -308,6 +409,7 @@ public void testNoReadAccess() throws Throwable { subdir2File1, subdir2File2); + // four threads and the text filter. describe("LocatedFileStatusFetcher with %s", textFilePathPattern); roleConfig.setInt(LIST_STATUS_NUM_THREADS, 4); LocatedFileStatusFetcher fetcher2 = @@ -315,7 +417,7 @@ public void testNoReadAccess() throws Throwable { roleConfig, new Path[]{textFilePathPattern}, true, - ANYTHING, + EVERYTHING, true); Assertions.assertThat(fetcher2.getFileStatuses()) .describedAs("result of located scan") @@ -323,43 +425,47 @@ public void testNoReadAccess() throws Throwable { .flatExtracting(FileStatus::getPath) .containsExactlyInAnyOrder(subdirFile, subdir2File1); - // query a file and expect the initial scan to fail except in - // auth mode. - describe("LocatedFileStatusFetcher with %s", subdirFile); + // pass in a file as the base of the scan. + describe("LocatedFileStatusFetcher with file %s", subdirFile); roleConfig.setInt(LIST_STATUS_NUM_THREADS, 16); try { - LocatedFileStatusFetcher fetcher3 = - new LocatedFileStatusFetcher( - roleConfig, - new Path[]{subdirFile}, - true, - TEXT_FILE, - true); - Iterable fetched = fetcher3.getFileStatuses(); + Iterable fetched = new LocatedFileStatusFetcher( + roleConfig, + new Path[]{subdirFile}, + true, + TEXT_FILE, + true).getFileStatuses(); + // when not in auth mode, the HEAD request MUST have failed. failif(!authMode, "LocatedFileStatusFetcher(" + subdirFile + ")" + " should have failed"); + // and in auth mode, the file MUST have been found. Assertions.assertThat(fetched) .describedAs("result of located scan") .isNotNull() .flatExtracting(FileStatus::getPath) .containsExactly(subdirFile); } catch (AccessDeniedException e) { + // we require the HEAD request to fail with access denied in non-auth + // mode, but not in auth mode. failif(authMode, "LocatedFileStatusFetcher(" + subdirFile + ")", e); } - // a file that doesn't exist + // scan a path that doesn't exist Path nonexistent = new Path(noReadDir, "nonexistent"); - intercept(InvalidInputException.class, + InvalidInputException ex = intercept(InvalidInputException.class, DOES_NOT_EXIST, () -> new LocatedFileStatusFetcher( roleConfig, new Path[]{nonexistent}, true, - ANYTHING, + EVERYTHING, true) .getFileStatuses()); + // validate nested exception + assertExceptionContains(DOES_NOT_EXIST, ex.getCause()); // a file which exists but which doesn't match the pattern + // is downgraded to not existing. intercept(InvalidInputException.class, DOES_NOT_EXIST, () -> new LocatedFileStatusFetcher( @@ -370,8 +476,8 @@ public void testNoReadAccess() throws Throwable { true) .getFileStatuses()); - // a pattern under a nonexistent path. - intercept( + // a pattern under a nonexistent path is considered to not be a match. + ex = intercept( InvalidInputException.class, MATCHES_0_FILES, () -> new LocatedFileStatusFetcher( @@ -381,13 +487,75 @@ public void testNoReadAccess() throws Throwable { TEXT_FILE, true) .getFileStatuses()); + // validate nested exception + assertExceptionContains(MATCHES_0_FILES, ex.getCause()); + + // ----------------------------------------------------------------- + // finally, do some cleanup to see what happens with a delete + // ----------------------------------------------------------------- + + if (!authMode) { + // unguarded or non-auth S3Guard to fail on HEAD + / + accessDenied(() -> readonlyFS.delete(emptyDir, true)); + // to fail on HEAD + accessDenied(() -> readonlyFS.delete(emptyFile, true)); + } else { + // auth mode checks DDB for status and then issues the DELETE + readonlyFS.delete(emptyDir, true); + readonlyFS.delete(emptyFile, true); + } + + // this will succeed for both as there is no subdir marker. + readonlyFS.delete(subDir, true); + // after which it is not there + fileNotFound(() -> readonlyFS.getFileStatus(subDir)); + // and nor is its child. + fileNotFound(() -> readonlyFS.getFileStatus(subdirFile)); + + // now delete the base path + readonlyFS.delete(basePath, true); + // and expect an FNFE + fileNotFound(() -> readonlyFS.getFileStatus(subDir)); + } + + /** + * Require an operation to fail with a FileNotFoundException. + * @param eval closure to evaluate. + * @param type of callable + * @return the exception. + * @throws Exception any other exception + */ + protected FileNotFoundException fileNotFound(final Callable eval) + throws Exception { + return intercept(FileNotFoundException.class, eval); } + /** + * Require an operation to fail with an AccessDeniedException. + * @param eval closure to evaluate. + * @param type of callable + * @return the exception. + * @throws Exception any other exception + */ + protected AccessDeniedException accessDenied(final Callable eval) + throws Exception { + return intercept(AccessDeniedException.class, eval); + } + + /** + * Assert that a status array has exactly one element and its + * value is as expected. + * @param expected expected path + * @param statuses list of statuses + */ protected void assertStatusPathEquals(final Path expected, final FileStatus[] statuses) { Assertions.assertThat(statuses) + .describedAs("List of status entries") + .isNotNull() .hasSize(1); Assertions.assertThat(statuses[0].getPath()) + .describedAs("Status entry %s", statuses[0]) .isEqualTo(expected); } @@ -450,10 +618,5 @@ protected FileStatus[] globFS( return st; } - private static final PathFilter HIDDEN_FILE_FILTER = - (p) -> { - String n = p.getName(); - return !n.startsWith("_") && !n.startsWith("."); - }; } From 526e964671614c4d297c2906bacfda8943e232d9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 31 Jul 2019 15:05:00 +0100 Subject: [PATCH 08/15] HADOOP-16458: remove unused ITest; break up ITestRestrictedReadAccess method into pieces Change-Id: Ic65c49bd512f4b8f33c211b9e4924b203165619f --- .../fs/s3a/ITestLocatedFileStatusFetcher.java | 40 ------ .../s3a/auth/ITestRestrictedReadAccess.java | 117 +++++++++++++----- 2 files changed, 86 insertions(+), 71 deletions(-) delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java deleted file mode 100644 index bd6bf2f6cdbc3..0000000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestLocatedFileStatusFetcher.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Test the LocatedFileStatusFetcher can do. - * This is related to HADOOP-16458. - * There's basic tests in ITestS3AFSMainOperations; this - * is see if we can create better corner cases. - */ -public class ITestLocatedFileStatusFetcher extends AbstractS3ATestBase { - - private static final Logger LOG = - LoggerFactory.getLogger(ITestLocatedFileStatusFetcher.class); - - @Test - public void testGlobScan() throws Throwable { - - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index f22514d22731b..081a2e1b796ae 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -98,8 +98,11 @@ *
      *
    1. A failure of an assertion blocks all subsequent assertions from * being checked.
    2. - *
    3. Maintenance is going to be harder.
    4. + *
    5. Maintenance is potentially harder.
    6. *
    + * To simplify maintenance, the operations tested are broken up into + * their own methods, with fields used to share the restricted role and + * created paths. */ @SuppressWarnings("ThrowableNotThrown") @RunWith(Parameterized.class) @@ -157,6 +160,26 @@ public static Collection params() { private final boolean authMode; + private Path basePath; + + private Path noReadDir; + + private Path emptyDir; + + private Path emptyFile; + + private Path subDir; + + private Path subdirFile; + + private Path subDir2; + + private Path subdir2File1; + + private Path subdir2File2; + + private Configuration roleConfig; + /** * A read-only FS; if non-null it is closed in teardown. */ @@ -231,48 +254,69 @@ private Configuration createAssumedRoleConfig(String roleARN) { return newAssumedRoleConfig(getContract().getConf(), roleARN); } + /** + * This is a single test case which invokes the individual test cases + * in sequence. + */ @Test public void testNoReadAccess() throws Throwable { describe("Test failure handling if the client doesn't" + " have read access under a path"); + initNoReadAccess(); + + // now move up the API Chain, from the calls made by globStatus, + // to globStatus itself, and then to LocatedFileStatusFetcher, + // which invokes globStatus + + testBasicFileOperations(); + testGlobOperations(); + testLocatedFileStatusFetcher(); + testDeleteOperations(); + } + + /** + * Initialize the directory tree and the role filesystem. + */ + public void initNoReadAccess() throws Throwable { + describe("Setting up filesystem"); + S3AFileSystem realFS = getFileSystem(); // avoiding the parameterization to steer clear of accidentally creating // patterns - Path basePath = path("testNoReadAccess-" + name); + basePath = path("testNoReadAccess-" + name); // define the paths and create them. describe("Creating test directories and files"); // this is the directory to which the restricted role has no read // access. - Path noReadDir = new Path(basePath, "noReadDir"); + noReadDir = new Path(basePath, "noReadDir"); // an empty directory directory under the noReadDir - Path emptyDir = new Path(noReadDir, "emptyDir"); + emptyDir = new Path(noReadDir, "emptyDir"); realFS.mkdirs(emptyDir); // an empty file directory under the noReadDir - Path emptyFile = new Path(noReadDir, "emptyFile.txt"); + emptyFile = new Path(noReadDir, "emptyFile.txt"); touch(realFS, emptyFile); // a subdirectory - Path subDir = new Path(noReadDir, "subDir"); + subDir = new Path(noReadDir, "subDir"); // and a file in that subdirectory - Path subdirFile = new Path(subDir, "subdirFile.txt"); + subdirFile = new Path(subDir, "subdirFile.txt"); createFile(realFS, subdirFile, true, HELLO); - Path subDir2 = new Path(noReadDir, "subDir2"); - Path subdir2File1 = new Path(subDir2, "subdir2File1.txt"); - Path subdir2File2 = new Path(subDir2, "subdir2File2.docx"); + subDir2 = new Path(noReadDir, "subDir2"); + subdir2File1 = new Path(subDir2, "subdir2File1.txt"); + subdir2File2 = new Path(subDir2, "subdir2File2.docx"); createFile(realFS, subdir2File1, true, HELLO); createFile(realFS, subdir2File2, true, HELLO); - // create a role filesystem which does not have read access under a path // it still has write access, which can be explored in the final // step to delete files and directories. - Configuration roleConfig = createAssumedRoleConfig(); + roleConfig = createAssumedRoleConfig(); bindRolePolicyStatements(roleConfig, STATEMENT_S3GUARD_CLIENT, STATEMENT_ALLOW_SSE_KMS_RW, @@ -281,14 +325,12 @@ public void testNoReadAccess() throws Throwable { .addActions(S3_ALL_GET) .addResources(directory(noReadDir))); readonlyFS = (S3AFileSystem) basePath.getFileSystem(roleConfig); + } - // now move up the API Chain, from the calls made by globStatus, - // to globStatus itself, and then to LocatedFileStatusFetcher, - // which invokes globStatus - - // ----------------------------------------------------------------- - // validate basic IO operations. - // ----------------------------------------------------------------- + /** + * Validate basic IO operations. + */ + public void testBasicFileOperations() throws Throwable { // this is a LIST call; there's no marker. // so the sequence is @@ -344,14 +386,16 @@ public void testNoReadAccess() throws Throwable { } readonlyFS.getFileStatus(subdirFile); } + } - // ----------------------------------------------------------------- - // Explore Glob's recursive scan - // ----------------------------------------------------------------- + /** + * Explore Glob's recursive scan. + */ + public void testGlobOperations() throws Throwable { describe("Glob Status operations"); // baseline: the real filesystem on a subdir - globFS(realFS, subdirFile, null, false, 1); + globFS(getFileSystem(), subdirFile, null, false, 1); // a file fails if not in auth mode globFS(readonlyFS, subdirFile, null, !authMode, 1); // empty directories don't fail. @@ -386,11 +430,16 @@ public void testNoReadAccess() throws Throwable { .extracting(FileStatus::getPath) .containsExactly(noReadDir); - // ----------------------------------------------------------------- - // now run a located file status fetcher against the directory tree. - // ----------------------------------------------------------------- - describe("LocatedFileStatusFetcher operations"); + } + + /** + * Run a located file status fetcher against the directory tree. + */ + public void testLocatedFileStatusFetcher() throws Throwable { + describe("LocatedFileStatusFetcher operations"); + // wildcard scan to find *.txt + Path textFilePathPattern = new Path(noReadDir, "*/*.txt"); // use the same filter as FileInputFormat; single thread. roleConfig.setInt(LIST_STATUS_NUM_THREADS, 1); LocatedFileStatusFetcher fetcher = @@ -489,10 +538,16 @@ public void testNoReadAccess() throws Throwable { .getFileStatuses()); // validate nested exception assertExceptionContains(MATCHES_0_FILES, ex.getCause()); + } - // ----------------------------------------------------------------- - // finally, do some cleanup to see what happens with a delete - // ----------------------------------------------------------------- + /** + * Do some cleanup to see what happens with delete calls. + * Cleanup happens in test teardown anyway; doing it here + * just makes use of the delete calls to see how delete failures + * change with permissions and S3Guard stettings. + */ + public void testDeleteOperations() throws Throwable { + describe("Testing delete operations"); if (!authMode) { // unguarded or non-auth S3Guard to fail on HEAD + / @@ -560,7 +615,7 @@ protected void assertStatusPathEquals(final Path expected, } /** - * Glob. + * Glob under a path with expected outcomes. * @param fs filesystem to use * @param path path (which can include patterns) * @param filter optional filter From 191ac65db07bb16abfe47a716e00d30ae90e92bc Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 28 Aug 2019 15:38:37 +0100 Subject: [PATCH 09/15] HADOOP-16458 final feedback from gabor Change-Id: I6937aa4c7de8adc135d9eb6bf99cca1b69773014 --- .../apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index 081a2e1b796ae..1a3c6fb3e1c3f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -77,7 +77,7 @@ /** * This test creates a client with no read access to the underlying * filesystem and then tries to perform various read operations on it. - * S3Guard in auth mode always goes to the FS, so we parameterize the + * S3Guard in non-auth mode always goes to the FS, so we parameterize the * test for S3Guard + Auth to see how failures move around. *
      *
    1. Tests only run if an assumed role is provided.
    2. From eab5a8ab217c73e722b7c43847be74058d555300 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 28 Aug 2019 15:52:43 +0100 Subject: [PATCH 10/15] HADOOP-16458 and address an existing checkstyle issue on the LocatedFileStatusFetcher Change-Id: I592cff79fce8680daee55bae983465919734a8f1 --- .../org/apache/hadoop/mapred/LocatedFileStatusFetcher.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java index 471f208a14126..a248f1401cb8d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LocatedFileStatusFetcher.java @@ -99,8 +99,8 @@ public class LocatedFileStatusFetcher { * @throws IOException */ public LocatedFileStatusFetcher(Configuration conf, Path[] dirs, - boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException, - IOException { + boolean recursive, PathFilter inputFilter, boolean newApi) + throws InterruptedException, IOException { int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS, FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS); LOG.debug("Instantiated LocatedFileStatusFetcher with {} threads", From 779c001f9eb1500f1276b978e191bef40d22bb50 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 3 Sep 2019 18:06:03 +0100 Subject: [PATCH 11/15] HADOOP-16458 gabor's comments about comments Change-Id: I792c6187fdaaf6c981ac635ec9b527a925ad3d14 --- .../hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index 1a3c6fb3e1c3f..8686618c08638 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -375,10 +375,11 @@ public void testBasicFileOperations() throws Throwable { accessDenied(() -> ContractTestUtils.readUTF8(readonlyFS, emptyFile, 0)); } else { - // auth mode doesn't just not check the store, - // because it knows the file length is zero, + // auth mode doesn't check the store. + // Furthermore, because it knows the file length is zero, // it returns -1 without even opening the file. - // see: HADOOP-16464 + // This means that permissions on the file do not get checked. + // See: HADOOP-16464. try (FSDataInputStream is = readonlyFS.open(emptyFile)) { Assertions.assertThat(is.read()) .describedAs("read of empty file") From 8ac342e55dd7675b52734bd0a3c469750be1cf40 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 3 Sep 2019 18:10:01 +0100 Subject: [PATCH 12/15] HADOOP-16458. Fix findbugs reported null deref Change-Id: I3c1d64749107e6f69be76cdcb4eab724cdb1f9ba --- .../src/main/java/org/apache/hadoop/fs/Globber.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 038c30c200a44..0b5553385e6ec 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -102,7 +102,7 @@ private Globber(FileContext fc, Path pathPattern, PathFilter filter, this.pathPattern = pathPattern; this.filter = filter; this.resolveSymlinks = resolveSymlinks; - this.tracer = FsTracer.get(fs.getConf()); + this.tracer = fc.getTracer(); LOG.debug("Created Globber path={}, symlinks={}", pathPattern, resolveSymlinks); } From 6c03c2aebed32a7eb3e8048bd91ec15b23c492e5 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 9 Sep 2019 19:35:42 +0100 Subject: [PATCH 13/15] HADOOP-16458 located status tuning -break up the ITestRestrictedReadAccess operations into more methods and change test* to check* as the prefix as the IDE was complaining about test- prefixed methods which weren't annotated. -javadoc and other reviews of the Globber changes Change-Id: Ib4f176926e7631d93829bab45e85857480521042 --- .../java/org/apache/hadoop/fs/Globber.java | 32 ++++++--- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 5 +- .../fs/s3a/ITestS3AFSMainOperations.java | 4 +- .../s3a/auth/ITestRestrictedReadAccess.java | 69 +++++++++++++------ 4 files changed, 75 insertions(+), 35 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 0b5553385e6ec..53fddb00b34f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -50,7 +50,7 @@ public class Globber { private final PathFilter filter; private final Tracer tracer; private final boolean resolveSymlinks; - + Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.fs = fs; this.fc = null; @@ -70,7 +70,7 @@ public class Globber { } /** - * Constructor for use by {@link GlobBuilder}. + * Filesystem constructor for use by {@link GlobBuilder}. * @param fs filesystem * @param pathPattern path pattern * @param filter optional filter @@ -89,7 +89,7 @@ private Globber(FileSystem fs, Path pathPattern, PathFilter filter, } /** - * Constructor for use by {@link GlobBuilder}. + * File Context constructor for use by {@link GlobBuilder}. * @param fc file context * @param pathPattern path pattern * @param filter optional filter @@ -195,7 +195,7 @@ private String authorityFromPath(Path path) throws IOException { public FileStatus[] glob() throws IOException { TraceScope scope = tracer.newScope("Globber#glob"); scope.addKVAnnotation("pattern", pathPattern.toUri().getPath()); - try(DurationInfo ignored = new DurationInfo(LOG, false, + try (DurationInfo ignored = new DurationInfo(LOG, false, "glob %s", pathPattern)) { return doGlob(); } finally { @@ -324,7 +324,6 @@ private FileStatus[] doGlob() throws IOException { // the listing status is of a file continue; } - } } for (FileStatus child : children) { @@ -394,17 +393,29 @@ private FileStatus[] doGlob() throws IOException { return ret; } - public static GlobBuilder createGlobber(FileSystem fs) { - return new GlobBuilder(fs); + /** + * Create a builder for a Globber, bonded to the specific filesystem. + * @param filesystem filesystem + * @return the builder to finish configuring. + */ + public static GlobBuilder createGlobber(FileSystem filesystem) { + return new GlobBuilder(filesystem); } - public static GlobBuilder createGlobber(FileContext fc) { - return new GlobBuilder(fc); + /** + * Create a builder for a Globber, bonded to the specific file + * context. + * @param fileContext file context. + * @return the builder to finish configuring. + */ + public static GlobBuilder createGlobber(FileContext fileContext) { + return new GlobBuilder(fileContext); } /** - * Builder for glob instances. + * Builder for Globber instances. */ + @InterfaceAudience.Private public static class GlobBuilder { private final FileSystem fs; @@ -474,6 +485,5 @@ public Globber build() { ? new Globber(fs, pathPattern, filter, resolveSymlinks) : new Globber(fc, pathPattern, filter, resolveSymlinks); } - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 0d62329cdb51e..1a1d9b75e4f16 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3674,8 +3674,9 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException { } /** - * Override superclass so as to add statistic collection - * and disable symlink resolution. + * Override superclass so as to disable symlink resolution and so avoid + * some calls to the FS which may have problems when the store is being + * inconsistent. * {@inheritDoc} */ @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java index 6552150ac04d3..511aa0fc80dbc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java @@ -58,8 +58,8 @@ public void testGlobStatusThrowsExceptionForUnreadableDir() { @Override @Ignore("local FS path setup broken") - public void testCopyToLocalWithUseRawLocalFileSystemOption() throws - Exception { + public void testCopyToLocalWithUseRawLocalFileSystemOption() + throws Exception { } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index 8686618c08638..a741cd6f9d5fc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -85,13 +85,16 @@ * there was not one already. *
    * The tests are all bundled into one big test case. - * This is, from a purist unit test perspective, - * utterly wrong as it goes against the "Each test case tests exactly one - * thing" philosophy of JUnit. + * From a purist unit test perspective, this is utterly wrong as it goes + * against the + * "Each test case tests exactly one thing" + * philosophy of JUnit. + *

    * However is significantly reduces setup costs on the parameterized test runs, * as it means that the filesystems and directories only need to be * created and destroyed once per parameterized suite, rather than * once per individual test. + *

    * All the test probes have informative messages so when a test failure * does occur, its cause should be discoverable. It main weaknesses are * therefore: @@ -142,6 +145,14 @@ public class ITestRestrictedReadAccess extends AbstractS3ATestBase { */ public static final byte[] HELLO = "hello".getBytes(Charset.forName("UTF-8")); + /** + * Wildcard scan to find *.txt in the no-read directory. + * When a scan/glob is done with S3Guard in auth mode, the scan will + * succeed but the file open will fail for any non-empty file. + * In non-auth mode, the read restrictions will fail the actual scan. + */ + private Path noReadWildcard; + /** * Parameterization. */ @@ -268,10 +279,13 @@ public void testNoReadAccess() throws Throwable { // to globStatus itself, and then to LocatedFileStatusFetcher, // which invokes globStatus - testBasicFileOperations(); - testGlobOperations(); - testLocatedFileStatusFetcher(); - testDeleteOperations(); + checkBasicFileOperations(); + checkGlobOperations(); + checkSingleThreadedLocatedFileStatus(); + checkLocatedFileStatusFourThreads(); + checkLocatedFileStatusScanFile(); + checkLocatedFileStatusNonexistentPath(); + checkDeleteOperations(); } /** @@ -292,6 +306,8 @@ public void initNoReadAccess() throws Throwable { // this is the directory to which the restricted role has no read // access. noReadDir = new Path(basePath, "noReadDir"); + // wildcard scan to find *.txt + noReadWildcard = new Path(noReadDir, "*/*.txt"); // an empty directory directory under the noReadDir emptyDir = new Path(noReadDir, "emptyDir"); @@ -330,7 +346,7 @@ public void initNoReadAccess() throws Throwable { /** * Validate basic IO operations. */ - public void testBasicFileOperations() throws Throwable { + public void checkBasicFileOperations() throws Throwable { // this is a LIST call; there's no marker. // so the sequence is @@ -392,7 +408,7 @@ public void testBasicFileOperations() throws Throwable { /** * Explore Glob's recursive scan. */ - public void testGlobOperations() throws Throwable { + public void checkGlobOperations() throws Throwable { describe("Glob Status operations"); // baseline: the real filesystem on a subdir @@ -403,10 +419,8 @@ public void testGlobOperations() throws Throwable { assertStatusPathEquals(emptyDir, globFS(readonlyFS, emptyDir, null, false, 1)); - // wildcard scan to find *.txt - Path textFilePathPattern = new Path(noReadDir, "*/*.txt"); FileStatus[] st = globFS(readonlyFS, - textFilePathPattern, + noReadWildcard, null, false, 2); Assertions.assertThat(st) .extracting(FileStatus::getPath) @@ -430,17 +444,14 @@ public void testGlobOperations() throws Throwable { Assertions.assertThat(st2) .extracting(FileStatus::getPath) .containsExactly(noReadDir); - } /** * Run a located file status fetcher against the directory tree. */ - public void testLocatedFileStatusFetcher() throws Throwable { + public void checkSingleThreadedLocatedFileStatus() throws Throwable { describe("LocatedFileStatusFetcher operations"); - // wildcard scan to find *.txt - Path textFilePathPattern = new Path(noReadDir, "*/*.txt"); // use the same filter as FileInputFormat; single thread. roleConfig.setInt(LIST_STATUS_NUM_THREADS, 1); LocatedFileStatusFetcher fetcher = @@ -459,13 +470,21 @@ public void testLocatedFileStatusFetcher() throws Throwable { subdir2File1, subdir2File2); + } + + /** + * Run a located file status fetcher against the directory tree. + */ + public void checkLocatedFileStatusFourThreads() throws Throwable { + // four threads and the text filter. - describe("LocatedFileStatusFetcher with %s", textFilePathPattern); - roleConfig.setInt(LIST_STATUS_NUM_THREADS, 4); + int threads = 4; + describe("LocatedFileStatusFetcher with %d", threads); + roleConfig.setInt(LIST_STATUS_NUM_THREADS, threads); LocatedFileStatusFetcher fetcher2 = new LocatedFileStatusFetcher( roleConfig, - new Path[]{textFilePathPattern}, + new Path[]{noReadWildcard}, true, EVERYTHING, true); @@ -474,7 +493,12 @@ public void testLocatedFileStatusFetcher() throws Throwable { .isNotNull() .flatExtracting(FileStatus::getPath) .containsExactlyInAnyOrder(subdirFile, subdir2File1); + } + /** + * Run a located file status fetcher against the directory tree. + */ + public void checkLocatedFileStatusScanFile() throws Throwable { // pass in a file as the base of the scan. describe("LocatedFileStatusFetcher with file %s", subdirFile); roleConfig.setInt(LIST_STATUS_NUM_THREADS, 16); @@ -499,7 +523,12 @@ public void testLocatedFileStatusFetcher() throws Throwable { // mode, but not in auth mode. failif(authMode, "LocatedFileStatusFetcher(" + subdirFile + ")", e); } + } + /** + * Explore what happens with a path that does not exist. + */ + public void checkLocatedFileStatusNonexistentPath() throws Throwable { // scan a path that doesn't exist Path nonexistent = new Path(noReadDir, "nonexistent"); InvalidInputException ex = intercept(InvalidInputException.class, @@ -547,7 +576,7 @@ public void testLocatedFileStatusFetcher() throws Throwable { * just makes use of the delete calls to see how delete failures * change with permissions and S3Guard stettings. */ - public void testDeleteOperations() throws Throwable { + public void checkDeleteOperations() throws Throwable { describe("Testing delete operations"); if (!authMode) { From c43f683fc5352fcb06c26482ad6fe159bcb4ec04 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 12 Sep 2019 13:23:54 +0100 Subject: [PATCH 14/15] HADOOP-16458 address my own review comments Change-Id: I80e5054ae2a5b7c7b616586f9882f07fcf0b8f92 --- .../src/main/java/org/apache/hadoop/fs/Globber.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 53fddb00b34f5..1a1b9d4ec2c45 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -25,8 +25,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; - import org.apache.hadoop.util.DurationInfo; + import org.apache.htrace.core.TraceScope; import org.apache.htrace.core.Tracer; import org.slf4j.Logger; @@ -35,7 +35,7 @@ /** * Implementation of {@link FileSystem#globStatus(Path, PathFilter)}. * This has historically been package-private; it has been opened - * up for object stores within the {@code hadoop-*} codebase. + * up for object stores within the {@code hadoop-*} codebase ONLY. * It could be expanded for external store implementations in future. */ @InterfaceAudience.Private From b81aa7a95c9f5eaecf06443e055bb2e79ab6fa4e Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 23 Sep 2019 15:00:43 +0100 Subject: [PATCH 15/15] HADOOP-16458 IDE warnings This is the first time anyone has edited the file for a few years; most of the warnings are unrelated to the patch. Change-Id: I429bc5f3c28d33f0321890487faa7ac2fd7a5ddd --- .../main/java/org/apache/hadoop/fs/Globber.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 1a1b9d4ec2c45..f301f22057925 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -32,6 +32,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static com.google.common.base.Preconditions.checkNotNull; + /** * Implementation of {@link FileSystem#globStatus(Path, PathFilter)}. * This has historically been package-private; it has been opened @@ -157,7 +159,7 @@ private static String unescapePathComponent(String name) { */ private static List getPathComponents(String path) throws IOException { - ArrayList ret = new ArrayList(); + ArrayList ret = new ArrayList<>(); for (String component : path.split(Path.SEPARATOR)) { if (!component.isEmpty()) { ret.add(component); @@ -219,7 +221,7 @@ private FileStatus[] doGlob() throws IOException { // Now loop over all flattened patterns. In every case, we'll be trying to // match them to entries in the filesystem. ArrayList results = - new ArrayList(flattenedPatterns.size()); + new ArrayList<>(flattenedPatterns.size()); boolean sawWildcard = false; for (String flatPattern : flattenedPatterns) { // Get the absolute path for this flattened pattern. We couldn't do @@ -234,7 +236,7 @@ private FileStatus[] doGlob() throws IOException { getPathComponents(absPattern.toUri().getPath()); // Starting out at the root of the filesystem, we try to match // filesystem entries against pattern components. - ArrayList candidates = new ArrayList(1); + ArrayList candidates = new ArrayList<>(1); // To get the "real" FileStatus of root, we'd have to do an expensive // RPC to the NameNode. So we create a placeholder FileStatus which has // the correct path, but defaults for the rest of the information. @@ -259,7 +261,7 @@ private FileStatus[] doGlob() throws IOException { for (int componentIdx = 0; componentIdx < components.size(); componentIdx++) { ArrayList newCandidates = - new ArrayList(candidates.size()); + new ArrayList<>(candidates.size()); GlobFilter globFilter = new GlobFilter(components.get(componentIdx)); String component = unescapePathComponent(components.get(componentIdx)); if (globFilter.hasPattern()) { @@ -434,7 +436,7 @@ public static class GlobBuilder { */ public GlobBuilder(final FileContext fc) { this.fs = null; - this.fc = fc; + this.fc = checkNotNull(fc); } /** @@ -442,7 +444,7 @@ public GlobBuilder(final FileContext fc) { * @param fs file system. */ public GlobBuilder(final FileSystem fs) { - this.fs = fs; + this.fs = checkNotNull(fs); this.fc = null; }