From cfe8d5997a6626a5e8643921f6eebf8ecc8fec34 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 27 Aug 2020 20:06:29 +0100 Subject: [PATCH 1/4] HADOOP-17227. Marker Tool tuning * move from -expect to -min and -max; easier for CLI testing. Plus works * in -nonauth mode, even when policy == keep, files not in an auth path count as failure. * bucket-info option also prints out the authoritative path, so you have more idea what is happening * reporting of command failure more informative The reason for change #2 is a workflow where you want to audit a dir, even though you are in keep mode, and you don't have any auth path. You'd expect -nonauth to say "no auth path", but instead it treats the whole dir as auth. Change-Id: Ib310e321e5862957fbd92bebfade93231f92b16f --- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 5 +- .../hadoop/fs/s3a/tools/MarkerTool.java | 206 +++++++++++++----- .../tools/hadoop-aws/directory_markers.md | 7 +- .../hadoop/fs/s3a/AbstractS3ATestBase.java | 7 +- .../fs/s3a/tools/AbstractMarkerToolTest.java | 4 +- .../hadoop/fs/s3a/tools/ITestMarkerTool.java | 9 +- 6 files changed, 175 insertions(+), 63 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index f89777f730376..415ba06f4c74d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -1463,9 +1463,12 @@ public int run(String[] args, PrintStream out) private void processMarkerOption(final PrintStream out, final S3AFileSystem fs, final String marker) { + println(out, "%nSecurity"); DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy(); String desc = markerPolicy.describe(); - println(out, "%nThe directory marker policy is \"%s\"%n", desc); + println(out, "\tThe directory marker policy is \"%s\"%n", desc); + printOption(out, "\tAuthoritative paths", + AUTHORITATIVE_PATH, ""); DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index 6855c52edbbbb..7bb1fc48f2db1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -53,6 +53,7 @@ import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; +import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.fs.shell.CommandFormat; @@ -113,9 +114,14 @@ public final class MarkerTool extends S3GuardTool { public static final String CLEAN = "-" + OPT_CLEAN; /** - * Expected number of markers to find: {@value}. + * Min number of markers to find: {@value}. */ - public static final String OPT_EXPECTED = "expected"; + public static final String OPT_MIN = "min"; + + /** + * Max number of markers to find: {@value}. + */ + public static final String OPT_MAX = "max"; /** * Name of a file to save the list of markers to: {@value}. @@ -151,13 +157,21 @@ public final class MarkerTool extends S3GuardTool { public static final int UNLIMITED_LISTING = 0; + /** + * Constant to use when there is no minimum number of + * markers: {@value}. + */ + public static final int UNLIMITED_MIN_MARKERS = -1; + + /** * Usage string: {@value}. */ private static final String USAGE = MARKERS + " (-" + OPT_AUDIT + " | -" + OPT_CLEAN + ")" - + " [-" + OPT_EXPECTED + " ]" + + " [-" + OPT_MIN + " ]" + + " [-" + OPT_MAX + " ]" + " [-" + OPT_OUT + " ]" + " [-" + OPT_LIMIT + " ]" + " [-" + OPT_NONAUTH + "]" @@ -195,7 +209,8 @@ public MarkerTool(final Configuration conf) { VERBOSE, OPT_NONAUTH); CommandFormat format = getCommandFormat(); - format.addOptionWithValue(OPT_EXPECTED); + format.addOptionWithValue(OPT_MIN); + format.addOptionWithValue(OPT_MAX); format.addOptionWithValue(OPT_LIMIT); format.addOptionWithValue(OPT_OUT); } @@ -231,8 +246,7 @@ public int run(final String[] args, final PrintStream stream) if (parsedArgs.size() != 1) { errorln(getUsage()); println(out, "Supplied arguments: [" - + parsedArgs.stream() - .collect(Collectors.joining(", ")) + + String.join(", ", parsedArgs) + "]"); throw new ExitUtil.ExitException(EXIT_USAGE, String.format(E_ARGUMENTS, parsedArgs.size())); @@ -241,12 +255,11 @@ public int run(final String[] args, final PrintStream stream) CommandFormat command = getCommandFormat(); verbose = command.getOpt(VERBOSE); - // How many markers are expected? - int expected = 0; - String value = command.getOptValue(OPT_EXPECTED); - if (value != null && !value.isEmpty()) { - expected = Integer.parseInt(value); - } + // minimum number of markers expected + int expectedMin = getOptValue(OPT_MIN, 0); + // max number of markers allowed + int expectedMax = getOptValue(OPT_MAX, 0); + // determine the action boolean audit = command.getOpt(OPT_AUDIT); @@ -258,11 +271,7 @@ public int run(final String[] args, final PrintStream stream) throw new ExitUtil.ExitException(EXIT_USAGE, "Exactly one of " + AUDIT + " and " + CLEAN); } - int limit = UNLIMITED_LISTING; - value = command.getOptValue(OPT_LIMIT); - if (value != null && !value.isEmpty()) { - limit = Integer.parseInt(value); - } + int limit = getOptValue(OPT_LIMIT, UNLIMITED_LISTING); final String dir = parsedArgs.get(0); Path path = new Path(dir); URI uri = path.toUri(); @@ -275,7 +284,8 @@ public int run(final String[] args, final PrintStream stream) fs, path, clean, - expected, + expectedMin, + expectedMax, limit, command.getOpt(OPT_NONAUTH)); if (verbose) { @@ -300,7 +310,30 @@ public int run(final String[] args, final PrintStream stream) IOUtils.writeLines(surplus, "\n", writer); } } - return result.exitCode; + + return result.finish(); + } + + /** + * Get the value of an option, or the default if the option + * is unset/empty. + * @param option option key + * @param defVal default + * @return the value to use + */ + private int getOptValue(String option, int defVal) { + CommandFormat command = getCommandFormat(); + String value = command.getOptValue(option); + if (value != null && !value.isEmpty()) { + try { + return Integer.parseInt(value); + } catch (NumberFormatException e) { + throw new ExitUtil.ExitException(EXIT_USAGE, + String.format("Argument for %s is not a number: %s", option, value)); + } + } else { + return defVal; + } } /** @@ -308,8 +341,9 @@ public int run(final String[] args, final PrintStream stream) * @param sourceFS source FS; must be or wrap an S3A FS. * @param path path to scan. * @param doPurge purge? - * @param expectedMarkerCount expected marker count - * @param limit limit of files to scan; -1 for 'unlimited' + * @param minMarkerCount min marker count (ignored on purge) + * @param maxMarkerCount max marker count (ignored on purge) + * @param limit limit of files to scan; 0 for 'unlimited' * @param nonAuth consider only markers in nonauth paths as errors * @return scan+purge result. * @throws IOException failure @@ -319,7 +353,8 @@ ScanResult execute( final FileSystem sourceFS, final Path path, final boolean doPurge, - final int expectedMarkerCount, + final int minMarkerCount, + final int maxMarkerCount, final int limit, final boolean nonAuth) throws IOException { @@ -360,10 +395,19 @@ ScanResult execute( } // the default filter policy is that all entries should be deleted - DirectoryPolicy filterPolicy = nonAuth - ? activePolicy - : null; - ScanResult result = scan(target, doPurge, expectedMarkerCount, limit, + DirectoryPolicy filterPolicy; + if (nonAuth) { + filterPolicy = new DirectoryPolicyImpl( + DirectoryPolicy.MarkerPolicy.Authoritative, + fs::allowAuthoritative); + } else { + filterPolicy = null; + } + ScanResult result = scan(target, + doPurge, + maxMarkerCount, + minMarkerCount, + limit, filterPolicy); return result; } @@ -378,6 +422,22 @@ public static final class ScanResult { */ private int exitCode; + /** + * Text to include if raising an exception. + */ + private String exitText = ""; + + /** + * Count of all markers found. + */ + private int totalMarkerCount; + + /** + * Count of all markers found after excluding + * any from a [-nonauth] qualification; + */ + private int filteredMarkerCount; + /** * The tracker. */ @@ -395,6 +455,9 @@ private ScanResult() { public String toString() { return "ScanResult{" + "exitCode=" + exitCode + + ", exitText=" + exitText + + ", totalMarkerCount=" + totalMarkerCount + + ", filteredMarkerCount=" + filteredMarkerCount + ", tracker=" + tracker + ", purgeSummary=" + purgeSummary + '}'; @@ -414,13 +477,34 @@ public DirMarkerTracker getTracker() { public MarkerPurgeSummary getPurgeSummary() { return purgeSummary; } + + public int getTotalMarkerCount() { + return totalMarkerCount; + } + + public int getFilteredMarkerCount() { + return filteredMarkerCount; + } + + /** + * Throw an exception if the exit code is non-zero. + * @return 0 if everything is good. + * @throws ExitUtil.ExitException if code != 0 + */ + public int finish() throws ExitUtil.ExitException { + if (exitCode != 0) { + throw new ExitUtil.ExitException(exitCode, exitText); + } + return 0; + } } /** * Do the scan/purge. * @param path path to scan. - * @param clean purge? - * @param expectedMarkerCount expected marker count + * @param doPurge purge rather than just scan/audit? + * @param minMarkerCount min marker count (ignored on purge) + * @param maxMarkerCount max marker count (ignored on purge) * @param limit limit of files to scan; 0 for 'unlimited' * @param filterPolicy filter policy on a nonauth scan; may be null * @return result. @@ -430,8 +514,9 @@ public MarkerPurgeSummary getPurgeSummary() { @Retries.RetryTranslated private ScanResult scan( final Path path, - final boolean clean, - final int expectedMarkerCount, + final boolean doPurge, + final int minMarkerCount, + final int maxMarkerCount, final int limit, final DirectoryPolicy filterPolicy) throws IOException, ExitUtil.ExitException { @@ -458,13 +543,16 @@ private ScanResult scan( = tracker.getSurplusMarkers(); Map leafMarkers = tracker.getLeafMarkers(); - int surplus = surplusMarkers.size(); - if (surplus == 0) { + // determine marker count + int markerCount = surplusMarkers.size(); + result.totalMarkerCount = markerCount; + result.filteredMarkerCount = markerCount; + if (markerCount == 0) { println(out, "No surplus directory markers were found under %s", path); } else { println(out, "Found %d surplus directory marker%s under %s", - surplus, - suffix(surplus), + markerCount, + suffix(markerCount), path); for (Path markers : surplusMarkers.keySet()) { @@ -482,9 +570,9 @@ private ScanResult scan( println(out, "These are required to indicate empty directories"); } - if (clean) { + if (doPurge) { // clean: remove the markers, do not worry about their - // presence when reporting success/failiure + // presence when reporting success/failure int deletePageSize = storeContext.getConfiguration() .getInt(BULK_DELETE_PAGE_SIZE, BULK_DELETE_PAGE_SIZE_DEFAULT); @@ -503,28 +591,41 @@ private ScanResult scan( allowed.forEach(p -> println(out, p.toString())); } // recalculate the marker size - surplus = surplusMarkers.size(); + markerCount = surplusMarkers.size(); + result.filteredMarkerCount = markerCount; } - if (surplus > expectedMarkerCount) { + if (markerCount < minMarkerCount || markerCount > maxMarkerCount) { // failure - if (expectedMarkerCount > 0) { - println(out, "Expected %d marker%s", expectedMarkerCount, - suffix(surplus)); - } - println(out, "Surplus markers were found -failing audit"); - - result.exitCode = EXIT_NOT_ACCEPTABLE; + return failScan(result, EXIT_NOT_ACCEPTABLE, + "Marker count %d out of range " + + "[%d - %d]", + markerCount, minMarkerCount, maxMarkerCount); } } // now one little check for whether a limit was reached. if (!completed) { - println(out, "Listing limit reached before completing the scan"); - result.exitCode = EXIT_INTERRUPTED; + failScan(result, EXIT_INTERRUPTED, + "Listing limit (%d) reached before completing the scan", limit); } return result; } + /** + * Fail the scan; print the formatted error and update the result. + * @param result result to update + * @param code Exit code + * @param message Error message + * @param args arguments for the error message + * @return scan result + */ + private ScanResult failScan(ScanResult result, int code, String message, Object...args) { + String text = String.format(message, args); + result.exitCode = code; + result.exitText = text; + return result; + } + /** * Suffix for plurals. * @param size size to generate a suffix for @@ -702,7 +803,8 @@ public void setVerbose(final boolean verbose) { * @param sourceFS filesystem to use * @param path path to scan * @param doPurge should markers be purged - * @param expectedMarkers number of markers expected + * @param minMarkerCount min marker count (ignored on purge) + * @param maxMarkerCount max marker count (ignored on purge) * @param limit limit of files to scan; -1 for 'unlimited' * @param nonAuth only use nonauth path count for failure rules * @return the result @@ -712,12 +814,14 @@ public static MarkerTool.ScanResult execMarkerTool( final FileSystem sourceFS, final Path path, final boolean doPurge, - final int expectedMarkers, - final int limit, boolean nonAuth) throws IOException { + final int minMarkerCount, + final int maxMarkerCount, + final int limit, + boolean nonAuth) throws IOException { MarkerTool tool = new MarkerTool(sourceFS.getConf()); tool.setVerbose(LOG.isDebugEnabled()); return tool.execute(sourceFS, path, doPurge, - expectedMarkers, limit, nonAuth); + minMarkerCount, maxMarkerCount, limit, nonAuth); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md index e9622ce906d85..53030d633eec6 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/directory_markers.md @@ -366,7 +366,7 @@ Syntax ``` > hadoop s3guard markers -verbose -nonauth -markers (-audit | -clean) [-expected ] [-out ] [-limit ] [-nonauth] [-verbose] +markers (-audit | -clean) [-min ] [-max ] [-out ] [-limit ] [-nonauth] [-verbose] View and manipulate S3 directory markers ``` @@ -377,7 +377,8 @@ markers (-audit | -clean) [-expected ] [-out ] [-limit ] |-------------------------|-------------------------| | `-audit` | Audit the path for surplus markers | | `-clean` | Clean all surplus markers under a path | -| `-expected ]` | Expected number of markers to find (primarily for testing) | +| `-min ` | Minimum number of markers an audit must find (default: 0) | +| `-max ]` | Minimum number of markers an audit must find (default: 0) | | `-limit ]` | Limit the number of objects to scan | | `-nonauth` | Only consider markers in non-authoritative paths as errors | | `-out ` | Save a list of all markers found to the nominated file | @@ -489,7 +490,7 @@ The `markers clean` command will clean the directory tree of all surplus markers The `-verbose` option prints more detail on the operation as well as some IO statistics ``` -> hadoop s3guard markers -verbose -clean s3a://london/ +> hadoop s3guard markers -clean -verbose s3a://london/ 2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths The directory marker policy of s3a://london is "Authoritative" diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java index a2ee9ea5f7b29..1f6735bc978c3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java @@ -91,9 +91,12 @@ public void maybeAuditTestPath() { .keepDirectoryMarkers(methodPath) && fs.isDirectory(methodPath)) { MarkerTool.ScanResult result = MarkerTool.execMarkerTool(fs, - methodPath, true, 0, UNLIMITED_LISTING, false); - assertEquals("Audit of " + methodPath + " failed: " + result, + methodPath, true, 0, 0, UNLIMITED_LISTING, false); + final String resultStr = result.toString(); + assertEquals("Audit of " + methodPath + " failed: " + resultStr, 0, result.getExitCode()); + assertEquals("Marker Count under " + methodPath + " non-zero: " + resultStr, + 0, result.getFilteredMarkerCount()); } } catch (FileNotFoundException ignored) { } catch (Exception e) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java index 00e62d9491070..97a4905fb60e9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java @@ -313,7 +313,9 @@ public static MarkerTool.ScanResult markerTool( path, doPurge, expectedMarkers, - limit, nonAuth); + expectedMarkers, + limit, + nonAuth); Assertions.assertThat(result.getExitCode()) .describedAs("Exit code of marker(%s, %s, %d) -> %s", path, doPurge, expectedMarkers, result) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java index 4a81b1aba919b..4c11a3389f132 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java @@ -259,7 +259,8 @@ public void testRunAuditWithExpectedMarkers() throws Throwable { AUDIT, m(OPT_LIMIT), 0, m(OPT_OUT), audit, - m(OPT_EXPECTED), expectedMarkersWithBaseDir, + m(OPT_MIN), expectedMarkersWithBaseDir, + m(OPT_MAX), expectedMarkersWithBaseDir, createdPaths.base); expectMarkersInOutput(audit, expectedMarkersWithBaseDir); } @@ -286,9 +287,6 @@ public void testRunLimitedAudit() throws Throwable { m(OPT_LIMIT), 2, CLEAN, createdPaths.base); - run(MARKERS, V, - AUDIT, - createdPaths.base); } /** @@ -302,7 +300,8 @@ public void testRunLimitedLandsatAudit() throws Throwable { describe("Audit a few thousand landsat objects"); final File audit = tempAuditFile(); - run(MARKERS, + runToFailure(EXIT_INTERRUPTED, + MARKERS, AUDIT, m(OPT_LIMIT), 3000, m(OPT_OUT), audit, From 5a66f87e1646dac7e388be1b575f281c4ba16986 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 28 Aug 2020 15:56:28 +0100 Subject: [PATCH 2/4] HADOOP-17227 bit more formatting tweaking Change-Id: Iddcefb26a7de0fce0c7b6ae0d679590005cd63b6 --- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 415ba06f4c74d..af9f1ff1597af 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -25,6 +25,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.file.AccessDeniedException; +import java.time.Duration; import java.util.Arrays; import java.util.Collection; import java.util.Date; @@ -46,6 +47,7 @@ import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DurationFormatUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -758,7 +760,7 @@ public int run(String[] args, PrintStream out) throws Exception { */ static class Destroy extends S3GuardTool { public static final String NAME = "destroy"; - public static final String PURPOSE = "destroy Metadata Store data " + public static final String PURPOSE = "destroy the Metadata Store including its contents" + DATA_IN_S3_IS_PRESERVED; private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + "\t" + PURPOSE + "\n\n" + @@ -1252,7 +1254,7 @@ public static class BucketInfo extends S3GuardTool { @VisibleForTesting public static final String IS_MARKER_AWARE = - "The S3A connector is compatible with buckets where" + "\tThe S3A connector is compatible with buckets where" + " directory markers are not deleted"; public BucketInfo(Configuration conf) { @@ -1328,8 +1330,9 @@ public int run(String[] args, PrintStream out) authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false); final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL, DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS); - println(out, "\tMetadata time to live: %s=%s milliseconds", - METADATASTORE_METADATA_TTL, ttl); + println(out, "\tMetadata time to live: (set in %s) = %s", + METADATASTORE_METADATA_TTL, + DurationFormatUtils.formatDurationHMS(ttl)); printStoreDiagnostics(out, store); } else { println(out, "Filesystem %s is not using S3Guard", fsUri); @@ -1466,10 +1469,15 @@ private void processMarkerOption(final PrintStream out, println(out, "%nSecurity"); DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy(); String desc = markerPolicy.describe(); - println(out, "\tThe directory marker policy is \"%s\"%n", desc); + println(out, "\tThe directory marker policy is \"%s\"", desc); + + String pols = DirectoryPolicyImpl.availablePolicies() + .stream() + .map(DirectoryPolicy.MarkerPolicy::getOptionName) + .collect(Collectors.joining(", ")); + println(out, "\tAvailable Policies: %s", pols); printOption(out, "\tAuthoritative paths", AUTHORITATIVE_PATH, ""); - DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy(); String desiredMarker = marker == null @@ -1481,12 +1489,6 @@ private void processMarkerOption(final PrintStream out, // simple awareness test -provides a way to validate compatibility // on the command line println(out, IS_MARKER_AWARE); - String pols = DirectoryPolicyImpl.availablePolicies() - .stream() - .map(DirectoryPolicy.MarkerPolicy::getOptionName) - .collect(Collectors.joining(", ")); - println(out, "Available Policies: %s", pols); - } else { // compare with current policy if (!optionName.equalsIgnoreCase(desiredMarker)) { From 1887f333d88ab02feae3b7caefcbfa540a2052f7 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 2 Sep 2020 15:53:20 +0100 Subject: [PATCH 3/4] HADOOP-17227. Marker Tool tuning * fix checkstyle * use bulder API for passing (Growing) set of params around Change-Id: I1ce980a4d7d4f5e9ad7f1c7b7fa4c6fd9806b8f1 --- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 5 +- .../hadoop/fs/s3a/tools/MarkerTool.java | 253 ++++++++++++++---- .../hadoop/fs/s3a/AbstractS3ATestBase.java | 18 +- .../fs/s3a/tools/AbstractMarkerToolTest.java | 17 +- 4 files changed, 228 insertions(+), 65 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index af9f1ff1597af..1aa310dc043f1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -25,7 +25,6 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.file.AccessDeniedException; -import java.time.Duration; import java.util.Arrays; import java.util.Collection; import java.util.Date; @@ -760,8 +759,8 @@ public int run(String[] args, PrintStream out) throws Exception { */ static class Destroy extends S3GuardTool { public static final String NAME = "destroy"; - public static final String PURPOSE = "destroy the Metadata Store including its contents" - + DATA_IN_S3_IS_PRESERVED; + public static final String PURPOSE = "destroy the Metadata Store including its" + + " contents" + DATA_IN_S3_IS_PRESERVED; private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + "\t" + PURPOSE + "\n\n" + "Common options:\n" + diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index 7bb1fc48f2db1..c18ba3d3f4ea5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -280,14 +280,17 @@ public int run(final String[] args, final PrintStream stream) path = new Path(path, "/"); } FileSystem fs = path.getFileSystem(getConf()); + boolean nonAuth = command.getOpt(OPT_NONAUTH); ScanResult result = execute( - fs, - path, - clean, - expectedMin, - expectedMax, - limit, - command.getOpt(OPT_NONAUTH)); + new ScanArgsBuilder() + .withSourceFS(fs) + .withPath(path) + .withDoPurge(clean) + .withMinMarkerCount(expectedMin) + .withMaxMarkerCount(expectedMax) + .withLimit(limit) + .withNonAuth(nonAuth) + .build()); if (verbose) { dumpFileSystemStatistics(out); } @@ -329,7 +332,8 @@ private int getOptValue(String option, int defVal) { return Integer.parseInt(value); } catch (NumberFormatException e) { throw new ExitUtil.ExitException(EXIT_USAGE, - String.format("Argument for %s is not a number: %s", option, value)); + String.format("Argument for %s is not a number: %s", + option, value)); } } else { return defVal; @@ -338,27 +342,14 @@ private int getOptValue(String option, int defVal) { /** * Execute the scan/purge. - * @param sourceFS source FS; must be or wrap an S3A FS. - * @param path path to scan. - * @param doPurge purge? - * @param minMarkerCount min marker count (ignored on purge) - * @param maxMarkerCount max marker count (ignored on purge) - * @param limit limit of files to scan; 0 for 'unlimited' - * @param nonAuth consider only markers in nonauth paths as errors - * @return scan+purge result. + * + * @param scanArgs@return scan+purge result. * @throws IOException failure */ @VisibleForTesting - ScanResult execute( - final FileSystem sourceFS, - final Path path, - final boolean doPurge, - final int minMarkerCount, - final int maxMarkerCount, - final int limit, - final boolean nonAuth) + ScanResult execute(final ScanArgs scanArgs) throws IOException { - S3AFileSystem fs = bindFilesystem(sourceFS); + S3AFileSystem fs = bindFilesystem(scanArgs.getSourceFS()); // extract the callbacks needed for the rest of the work storeContext = fs.createStoreContext(); @@ -379,6 +370,7 @@ ScanResult execute( println(out, "Authoritative path list is \"%s\"", authPath); } // qualify the path + Path path = scanArgs.getPath(); Path target = path.makeQualified(fs.getUri(), new Path("/")); // initial safety check: does the path exist? try { @@ -396,7 +388,7 @@ ScanResult execute( // the default filter policy is that all entries should be deleted DirectoryPolicy filterPolicy; - if (nonAuth) { + if (scanArgs.isNonAuth()) { filterPolicy = new DirectoryPolicyImpl( DirectoryPolicy.MarkerPolicy.Authoritative, fs::allowAuthoritative); @@ -404,10 +396,10 @@ ScanResult execute( filterPolicy = null; } ScanResult result = scan(target, - doPurge, - maxMarkerCount, - minMarkerCount, - limit, + scanArgs.isDoPurge(), + scanArgs.getMaxMarkerCount(), + scanArgs.getMinMarkerCount(), + scanArgs.getLimit(), filterPolicy); return result; } @@ -434,7 +426,7 @@ public static final class ScanResult { /** * Count of all markers found after excluding - * any from a [-nonauth] qualification; + * any from a [-nonauth] qualification. */ private int filteredMarkerCount; @@ -619,7 +611,11 @@ private ScanResult scan( * @param args arguments for the error message * @return scan result */ - private ScanResult failScan(ScanResult result, int code, String message, Object...args) { + private ScanResult failScan( + ScanResult result, + int code, + String message, + Object...args) { String text = String.format(message, args); result.exitCode = code; result.exitText = text; @@ -688,7 +684,7 @@ private boolean scanDirectoryTree( * Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)}; * included in {@link ScanResult} so must share visibility. */ - static final class MarkerPurgeSummary { + public static final class MarkerPurgeSummary { /** Number of markers deleted. */ private int markersDeleted; @@ -714,14 +710,26 @@ public String toString() { } + /** + * Count of markers deleted. + * @return a number, zero when prune==false. + */ int getMarkersDeleted() { return markersDeleted; } + /** + * Count of bulk delete requests issued. + * @return count of calls made to S3. + */ int getDeleteRequests() { return deleteRequests; } + /** + * Total duration of delete requests. + * @return a time interval in millis. + */ long getTotalDeleteRequestDuration() { return totalDeleteRequestDuration; } @@ -800,28 +808,173 @@ public void setVerbose(final boolean verbose) { /** * Execute the marker tool, with no checks on return codes. * - * @param sourceFS filesystem to use - * @param path path to scan - * @param doPurge should markers be purged - * @param minMarkerCount min marker count (ignored on purge) - * @param maxMarkerCount max marker count (ignored on purge) - * @param limit limit of files to scan; -1 for 'unlimited' - * @param nonAuth only use nonauth path count for failure rules + * @param scanArgs set of args for the scanner. * @return the result */ @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") public static MarkerTool.ScanResult execMarkerTool( - final FileSystem sourceFS, - final Path path, - final boolean doPurge, - final int minMarkerCount, - final int maxMarkerCount, - final int limit, - boolean nonAuth) throws IOException { - MarkerTool tool = new MarkerTool(sourceFS.getConf()); + ScanArgs scanArgs) throws IOException { + MarkerTool tool = new MarkerTool(scanArgs.getSourceFS().getConf()); tool.setVerbose(LOG.isDebugEnabled()); - return tool.execute(sourceFS, path, doPurge, - minMarkerCount, maxMarkerCount, limit, nonAuth); + return tool.execute(scanArgs); + } + + /** + * Arguments for the scan. + *

+ * Uses a builder/argument object because too many arguments were + * being created and it was making maintenance harder. + */ + public static final class ScanArgs { + + /** Source FS; must be or wrap an S3A FS. */ + private final FileSystem sourceFS; + + /** Path to scan. */ + private final Path path; + + /** Purge? */ + private final boolean doPurge; + + /** Min marker count (ignored on purge). */ + private final int minMarkerCount; + + /** Max marker count (ignored on purge). */ + private final int maxMarkerCount; + + /** Limit of files to scan; 0 for 'unlimited'. */ + private final int limit; + + /** Consider only markers in nonauth paths as errors. */ + private final boolean nonAuth; + + /** + * @param sourceFS source FS; must be or wrap an S3A FS. + * @param path path to scan. + * @param doPurge purge? + * @param minMarkerCount min marker count (ignored on purge) + * @param maxMarkerCount max marker count (ignored on purge) + * @param limit limit of files to scan; 0 for 'unlimited' + * @param nonAuth consider only markers in nonauth paths as errors + */ + private ScanArgs(final FileSystem sourceFS, + final Path path, + final boolean doPurge, + final int minMarkerCount, + final int maxMarkerCount, + final int limit, + final boolean nonAuth) { + this.sourceFS = sourceFS; + this.path = path; + this.doPurge = doPurge; + this.minMarkerCount = minMarkerCount; + this.maxMarkerCount = maxMarkerCount; + this.limit = limit; + this.nonAuth = nonAuth; + } + + FileSystem getSourceFS() { + return sourceFS; + } + + Path getPath() { + return path; + } + + boolean isDoPurge() { + return doPurge; + } + + int getMinMarkerCount() { + return minMarkerCount; + } + + int getMaxMarkerCount() { + return maxMarkerCount; + } + + int getLimit() { + return limit; + } + + boolean isNonAuth() { + return nonAuth; + } + } + + /** + * Builder of the scan arguments. + */ + public static final class ScanArgsBuilder { + + /** Source FS; must be or wrap an S3A FS. */ + private FileSystem sourceFS; + + /** Path to scan. */ + private Path path; + + /** Purge? */ + private boolean doPurge = false; + + /** Min marker count (ignored on purge). */ + private int minMarkerCount = 0; + + /** Max marker count (ignored on purge). */ + private int maxMarkerCount = 0; + + /** Limit of files to scan; 0 for 'unlimited'. */ + private int limit = UNLIMITED_LISTING; + + /** Consider only markers in nonauth paths as errors. */ + private boolean nonAuth = false; + + /** Source FS; must be or wrap an S3A FS. */ + public ScanArgsBuilder withSourceFS(final FileSystem sourceFS) { + this.sourceFS = sourceFS; + return this; + } + + /** Path to scan. */ + public ScanArgsBuilder withPath(final Path path) { + this.path = path; + return this; + } + + /** Purge? */ + public ScanArgsBuilder withDoPurge(final boolean doPurge) { + this.doPurge = doPurge; + return this; + } + + /** Min marker count (ignored on purge). */ + public ScanArgsBuilder withMinMarkerCount(final int minMarkerCount) { + this.minMarkerCount = minMarkerCount; + return this; + } + + /** Max marker count (ignored on purge). */ + public ScanArgsBuilder withMaxMarkerCount(final int maxMarkerCount) { + this.maxMarkerCount = maxMarkerCount; + return this; + } + + /** Limit of files to scan; 0 for 'unlimited'. */ + public ScanArgsBuilder withLimit(final int limit) { + this.limit = limit; + return this; + } + + /** Consider only markers in nonauth paths as errors. */ + public ScanArgsBuilder withNonAuth(final boolean nonAuth) { + this.nonAuth = nonAuth; + return this; + } + + public ScanArgs build() { + return new ScanArgs(sourceFS, path, doPurge, minMarkerCount, + maxMarkerCount, + limit, nonAuth); + } } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java index 1f6735bc978c3..da679cdecad78 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java @@ -90,12 +90,22 @@ public void maybeAuditTestPath() { && !fs.getDirectoryMarkerPolicy() .keepDirectoryMarkers(methodPath) && fs.isDirectory(methodPath)) { - MarkerTool.ScanResult result = MarkerTool.execMarkerTool(fs, - methodPath, true, 0, 0, UNLIMITED_LISTING, false); + MarkerTool.ScanResult result = MarkerTool.execMarkerTool( + new MarkerTool.ScanArgsBuilder() + .withSourceFS(fs) + .withPath(methodPath) + .withDoPurge(true) + .withMinMarkerCount(0) + .withMaxMarkerCount(0) + .withLimit(UNLIMITED_LISTING) + .withNonAuth(false) + .build()); final String resultStr = result.toString(); - assertEquals("Audit of " + methodPath + " failed: " + resultStr, + assertEquals("Audit of " + methodPath + " failed: " + + resultStr, 0, result.getExitCode()); - assertEquals("Marker Count under " + methodPath + " non-zero: " + resultStr, + assertEquals("Marker Count under " + methodPath + + " non-zero: " + resultStr, 0, result.getFilteredMarkerCount()); } } catch (FileNotFoundException ignored) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java index 97a4905fb60e9..797d33c8d90d7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java @@ -309,13 +309,15 @@ public static MarkerTool.ScanResult markerTool( final boolean nonAuth) throws IOException { MarkerTool.ScanResult result = MarkerTool.execMarkerTool( - sourceFS, - path, - doPurge, - expectedMarkers, - expectedMarkers, - limit, - nonAuth); + new MarkerTool.ScanArgsBuilder() + .withSourceFS(sourceFS) + .withPath(path) + .withDoPurge(doPurge) + .withMinMarkerCount(expectedMarkers) + .withMaxMarkerCount(expectedMarkers) + .withLimit(limit) + .withNonAuth(nonAuth) + .build()); Assertions.assertThat(result.getExitCode()) .describedAs("Exit code of marker(%s, %s, %d) -> %s", path, doPurge, expectedMarkers, result) @@ -332,5 +334,4 @@ protected static String m(String s) { return "-" + s; } - } From 6c04b40f4258530d478db850157a25d0e25f822d Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 3 Sep 2020 18:35:53 +0100 Subject: [PATCH 4/4] HADOOP-17227. Fix checkstyles. Change-Id: I49cc69ad61601fd858005323e73ae5ad7178e82e --- .../hadoop/fs/s3a/tools/MarkerTool.java | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index c18ba3d3f4ea5..f8172233b6a35 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -930,51 +930,59 @@ public static final class ScanArgsBuilder { private boolean nonAuth = false; /** Source FS; must be or wrap an S3A FS. */ - public ScanArgsBuilder withSourceFS(final FileSystem sourceFS) { - this.sourceFS = sourceFS; + public ScanArgsBuilder withSourceFS(final FileSystem source) { + this.sourceFS = source; return this; } /** Path to scan. */ - public ScanArgsBuilder withPath(final Path path) { - this.path = path; + public ScanArgsBuilder withPath(final Path p) { + this.path = p; return this; } /** Purge? */ - public ScanArgsBuilder withDoPurge(final boolean doPurge) { - this.doPurge = doPurge; + public ScanArgsBuilder withDoPurge(final boolean d) { + this.doPurge = d; return this; } /** Min marker count (ignored on purge). */ - public ScanArgsBuilder withMinMarkerCount(final int minMarkerCount) { - this.minMarkerCount = minMarkerCount; + public ScanArgsBuilder withMinMarkerCount(final int min) { + this.minMarkerCount = min; return this; } /** Max marker count (ignored on purge). */ - public ScanArgsBuilder withMaxMarkerCount(final int maxMarkerCount) { - this.maxMarkerCount = maxMarkerCount; + public ScanArgsBuilder withMaxMarkerCount(final int max) { + this.maxMarkerCount = max; return this; } /** Limit of files to scan; 0 for 'unlimited'. */ - public ScanArgsBuilder withLimit(final int limit) { - this.limit = limit; + public ScanArgsBuilder withLimit(final int l) { + this.limit = l; return this; } /** Consider only markers in nonauth paths as errors. */ - public ScanArgsBuilder withNonAuth(final boolean nonAuth) { - this.nonAuth = nonAuth; + public ScanArgsBuilder withNonAuth(final boolean b) { + this.nonAuth = b; return this; } + /** + * Build the actual argument instance. + * @return the arguments to pass in + */ public ScanArgs build() { - return new ScanArgs(sourceFS, path, doPurge, minMarkerCount, + return new ScanArgs(sourceFS, + path, + doPurge, + minMarkerCount, maxMarkerCount, - limit, nonAuth); + limit, + nonAuth); } } }