Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
66cac81
HADOOP-13230. Dir Marker patch: reapply everything from previous PR
steveloughran Jul 14, 2020
93be7b6
HADOOP-13230 dir marker -cost assertion classes and tests evolving
steveloughran Jul 15, 2020
f21b787
HADOOP-13230 operation cost tests
steveloughran Jul 16, 2020
4ec17cd
HADOOP-13230 factor out base class for cost tests and split suite by …
steveloughran Jul 16, 2020
c0320ef
HADOOP-13230 Marker Tool getting into a testable/tested state
steveloughran Jul 22, 2020
cf86899
HADOOP-13230 Marker Tool testing;
steveloughran Jul 22, 2020
0eb7a65
HADOOP-13230 directory marker tests
steveloughran Jul 27, 2020
62dfba3
HADOOP-13230 checkstyle warnings
steveloughran Jul 28, 2020
5e2d025
HADOOP-13230 tests, docs and tuning marker tool
steveloughran Jul 28, 2020
85c92c1
HADOOP-13230. Marker policy tests.
steveloughran Jul 29, 2020
52f7483
HADOOP-13230 Test fixing and rename debug/fix
steveloughran Jul 30, 2020
c3e2a6b
HADOOP-13230 yetus warnings; audit on teardown
steveloughran Aug 1, 2020
5273e87
HADOOP-13230 Test Tuning, docs, audit
steveloughran Aug 3, 2020
6a246ea
HADOOP-13230 getting rename right.
steveloughran Aug 4, 2020
2ed6e13
HADOOP-13230. Tests and MarkerTool.
steveloughran Aug 4, 2020
c9f12eb
HADOOP-13230. MarkerTool -nonauth
steveloughran Aug 5, 2020
54b85e9
HADOOP-13230. MarkerTool. docs and pathCapabilities
steveloughran Aug 11, 2020
11d9d68
HADOOP-13230. bucket-info is marker aware
steveloughran Aug 12, 2020
bbeed7e
HADOOP-13230. marker policy PathCapabilities enhancements
steveloughran Aug 14, 2020
e24d42b
HADOOP-13230. Final doc review
steveloughran Aug 15, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.test;

import java.util.concurrent.Callable;

import org.assertj.core.description.Description;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Extra classes to work with AssertJ.
* These are kept separate from {@link LambdaTestUtils} so there's
* no requirement for AssertJ to be on the classpath in that broadly
* used class.
*/
public final class AssertExtensions {

private static final Logger LOG =
LoggerFactory.getLogger(AssertExtensions.class);

private AssertExtensions() {
}

/**
* A description for AssertJ "describedAs" clauses which evaluates the
* lambda-expression only on failure. That must return a string
* or null/"" to be skipped.
* @param eval lambda expression to invoke
* @return a description for AssertJ
*/
public static Description dynamicDescription(Callable<String> eval) {
return new DynamicDescription(eval);
}

private static final class DynamicDescription extends Description {
private final Callable<String> eval;

private DynamicDescription(final Callable<String> eval) {
this.eval = eval;
}

@Override
public String value() {
try {
return eval.call();
} catch (Exception e) {
LOG.warn("Failed to evaluate description: " + e);
LOG.debug("Evaluation failure", e);
// return null so that the description evaluation chain
// will skip this one
return null;
}
}
}


}
58 changes: 58 additions & 0 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
<!-- Set a longer timeout for integration test (in milliseconds) -->
<test.integration.timeout>200000</test.integration.timeout>

<!-- should directory marker retention be audited? -->
<fs.s3a.directory.marker.audit>false</fs.s3a.directory.marker.audit>
<!-- marker retention policy -->
<fs.s3a.directory.marker.retention></fs.s3a.directory.marker.retention>
</properties>

<profiles>
Expand Down Expand Up @@ -123,6 +127,9 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
</systemPropertyVariables>
</configuration>
</plugin>
Expand Down Expand Up @@ -163,6 +170,7 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>

<test.default.timeout>${test.integration.timeout}</test.default.timeout>
</systemPropertyVariables>
Expand All @@ -189,6 +197,8 @@
<exclude>**/ITestDynamoDBMetadataStoreScale.java</exclude>
<!-- Terasort MR jobs spawn enough processes that they use up all RAM -->
<exclude>**/ITestTerasort*.java</exclude>
<!-- Root marker tool tests -->
<exclude>**/ITestMarkerToolRootOperations.java</exclude>
<!-- operations across the metastore -->
<exclude>**/ITestS3GuardDDBRootOperations.java</exclude>
</excludes>
Expand All @@ -215,6 +225,9 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
</systemPropertyVariables>
<!-- Do a sequential run for tests that cannot handle -->
<!-- parallel execution. -->
Expand All @@ -230,6 +243,10 @@
<!-- the local FS. Running them sequentially guarantees isolation -->
<!-- and that they don't conflict with the other MR jobs for RAM -->
<include>**/ITestTerasort*.java</include>
<!-- Root marker tool tests -->
<!-- MUST be run before the other root ops so there's
more likelihood of files in the bucket -->
<include>**/ITestMarkerToolRootOperations.java</include>
<!-- operations across the metastore -->
<include>**/ITestS3AContractRootDir.java</include>
<include>**/ITestS3GuardDDBRootOperations.java</include>
Expand Down Expand Up @@ -269,6 +286,9 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<!-- Markers-->
<fs.s3a.directory.marker.retention>${fs.s3a.directory.marker.retention}</fs.s3a.directory.marker.retention>
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
</systemPropertyVariables>
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
</configuration>
Expand Down Expand Up @@ -332,6 +352,44 @@
</properties>
</profile>

<!-- Directory marker retention options, all from the -Dmarkers value-->
<profile>
<id>keep-markers</id>
<activation>
<property>
<name>markers</name>
<value>keep</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>keep</fs.s3a.directory.marker.retention>
</properties>
</profile>
<profile>
<id>delete-markers</id>
<activation>
<property>
<name>markers</name>
<value>delete</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>delete</fs.s3a.directory.marker.retention>
</properties>
</profile>
<profile>
<id>auth-markers</id>
<activation>
<property>
<name>markers</name>
<value>authoritative</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker.retention>authoritative</fs.s3a.directory.marker.retention>
</properties>
</profile>

</profiles>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -953,4 +953,92 @@ private Constants() {
* Value: {@value} seconds.
*/
public static final int THREAD_POOL_SHUTDOWN_DELAY_SECONDS = 30;

/**
* Policy for directory markers.
* This is a new feature of HADOOP-13230 which addresses
* some scale, performance and permissions issues -but
* at the risk of backwards compatibility.
*/
public static final String DIRECTORY_MARKER_POLICY =
"fs.s3a.directory.marker.retention";

/**
* Delete directory markers. This is the backwards compatible option.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_DELETE =
"delete";

/**
* Retain directory markers.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_KEEP =
"keep";

/**
* Retain directory markers in authoritative directory trees only.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_AUTHORITATIVE =
"authoritative";

/**
* Default retention policy: {@value}.
*/
public static final String DEFAULT_DIRECTORY_MARKER_POLICY =
DIRECTORY_MARKER_POLICY_DELETE;


/**
* {@code PathCapabilities} probe to verify that an S3A Filesystem
* has the changes needed to safely work with buckets where
* directoy markers have not been deleted.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_AWARE
= "fs.s3a.capability.directory.marker.aware";

/**
* {@code PathCapabilities} probe to indicate that the filesystem
* keeps directory markers.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_KEEP
= "fs.s3a.capability.directory.marker.policy.keep";

/**
* {@code PathCapabilities} probe to indicate that the filesystem
* deletes directory markers.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_DELETE
= "fs.s3a.capability.directory.marker.policy.delete";

/**
* {@code PathCapabilities} probe to indicate that the filesystem
* keeps directory markers in authoritative paths only.
* Value: {@value}.
*/
public static final String
STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_AUTHORITATIVE =
"fs.s3a.capability.directory.marker.policy.authoritative";

/**
* {@code PathCapabilities} probe to indicate that a path
* keeps directory markers.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_KEEP
= "fs.s3a.capability.directory.marker.action.keep";

/**
* {@code PathCapabilities} probe to indicate that a path
* deletes directory markers.
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_DELETE
= "fs.s3a.capability.directory.marker.action.delete";

}
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,9 @@ private boolean isDescendant(String parent, String child, boolean recursive) {
} else {
Path actualParentPath = new Path(child).getParent();
Path expectedParentPath = new Path(parent);
return actualParentPath.equals(expectedParentPath);
// children which are directory markers are excluded here
return actualParentPath.equals(expectedParentPath)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to have a boolean var here and return the value - I prefer that for the sake of readability but this will do as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, the code already did that direct return

&& !child.endsWith("/");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,27 @@ public FileStatusListingIterator createFileStatusListingIterator(
Listing.FileStatusAcceptor acceptor,
RemoteIterator<S3AFileStatus> providedStatus) throws IOException {
return new FileStatusListingIterator(
new ObjectListingIterator(listPath, request),
createObjectListingIterator(listPath, request),
filter,
acceptor,
providedStatus);
}

/**
* Create an object listing iterator against a path, with a given
* list object request.
* @param listPath path of the listing
* @param request initial request to make
* @return the iterator
* @throws IOException IO Problems
*/
@Retries.RetryRaw
public ObjectListingIterator createObjectListingIterator(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this required, will there be some logic in the future other than creating the new object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mukund is doing a lot of work on listing performance; I'm just trying to keep up with the changes with this and the io statistics patches.

final Path listPath,
final S3ListRequest request) throws IOException {
return new ObjectListingIterator(listPath, request);
}

/**
* Create a located status iterator over a file status iterator.
* @param statusIterator an iterator over the remote status entries
Expand Down Expand Up @@ -194,8 +209,12 @@ public RemoteIterator<S3ALocatedFileStatus> getListFilesAssumingDir(

String key = maybeAddTrailingSlash(pathToKey(path));
String delimiter = recursive ? null : "/";
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
if (recursive) {
LOG.debug("Recursive list of all entries under {}", key);
} else {
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
}
final RemoteIterator<S3AFileStatus> cachedFilesIterator;
final Set<Path> tombstones;
boolean allowAuthoritative = listingOperationCallbacks
Expand Down
Loading