From 88517037fb326988aa2b0857620e81b3a013b37c Mon Sep 17 00:00:00 2001 From: Mohanad Elsafty Date: Wed, 9 Feb 2022 14:48:25 +0800 Subject: [PATCH 1/5] HADOOP-18117. Add an option to preserve root directory permissions --- .../apache/hadoop/tools/DistCpConstants.java | 2 ++ .../apache/hadoop/tools/DistCpContext.java | 4 ++++ .../hadoop/tools/DistCpOptionSwitch.java | 5 ++++- .../apache/hadoop/tools/DistCpOptions.java | 19 +++++++++++++++++++ .../apache/hadoop/tools/OptionsParser.java | 4 +++- .../hadoop/tools/SimpleCopyListing.java | 7 +++++-- .../hadoop/tools/mapred/CopyCommitter.java | 9 +++++++-- .../src/site/markdown/DistCp.md.vm | 1 + .../hadoop/tools/TestDistCpOptions.java | 13 ++++++++++++- .../hadoop/tools/TestOptionsParser.java | 14 ++++++++++++++ 10 files changed, 71 insertions(+), 7 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index c75c0e85dd791..95a53c3a2f0b2 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -86,6 +86,8 @@ private DistCpConstants() { public static final String CONF_LABEL_SPLIT_RATIO = "distcp.dynamic.split.ratio"; public static final String CONF_LABEL_DIRECT_WRITE = "distcp.direct.write"; + public static final String CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES = + "distcp.update.root.directory.attributes"; /* Total bytes to be copied. Updated by copylisting. Unfiltered count */ public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected"; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java index 8e9d64a376e9f..770ff44ba0cdd 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java @@ -175,6 +175,10 @@ public boolean shouldUseIterator() { return options.shouldUseIterator(); } + public boolean shouldUpdateRootDirectoryAttributes() { + return options.shouldUpdateRootDirectoryAttributes(); + } + public final boolean splitLargeFile() { return options.getBlocksPerChunk() > 0; } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 4163f8274d967..bf579071670ad 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -244,8 +244,11 @@ public enum DistCpOptionSwitch { USE_ITERATOR(DistCpConstants.CONF_LABEL_USE_ITERATOR, new Option("useiterator", false, "Use single threaded list status iterator to build " - + "the listing to save the memory utilisation at the client")); + + "the listing to save the memory utilisation at the client")), + UPDATE_ROOT_DIRECTORY_ATTRIBUTES(DistCpConstants.CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES, + new Option("updateRootDirectoryAttributes", false, + "Update root directory attributes (eg permissions, ownership ...)")); public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct"; private final String confLabel; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index fee53ec1d620b..8fdfba76b24a3 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -162,6 +162,8 @@ public final class DistCpOptions { private final boolean useIterator; + private final boolean updateRootDirectoryAttributes; + /** * File attributes for preserve. * @@ -228,6 +230,8 @@ private DistCpOptions(Builder builder) { this.directWrite = builder.directWrite; this.useIterator = builder.useIterator; + + this.updateRootDirectoryAttributes = builder.updateRootDirectoryAttributes; } public Path getSourceFileListing() { @@ -374,6 +378,10 @@ public boolean shouldUseIterator() { return useIterator; } + public boolean shouldUpdateRootDirectoryAttributes() { + return updateRootDirectoryAttributes; + } + /** * Add options to configuration. These will be used in the Mapper/committer * @@ -427,6 +435,9 @@ public void appendToConf(Configuration conf) { DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.USE_ITERATOR, String.valueOf(useIterator)); + + DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES, + String.valueOf(updateRootDirectoryAttributes)); } /** @@ -465,6 +476,7 @@ public String toString() { ", verboseLog=" + verboseLog + ", directWrite=" + directWrite + ", useiterator=" + useIterator + + ", updateRootDirectoryAttributes=" + updateRootDirectoryAttributes + '}'; } @@ -518,6 +530,8 @@ public static class Builder { private boolean useIterator = false; + private boolean updateRootDirectoryAttributes = false; + public Builder(List sourcePaths, Path targetPath) { Preconditions.checkArgument(sourcePaths != null && !sourcePaths.isEmpty(), "Source paths should not be null or empty!"); @@ -780,6 +794,11 @@ public Builder withUseIterator(boolean useItr) { this.useIterator = useItr; return this; } + + public Builder withUpdateRootDirectoryAttributes(boolean updateRootDirectoryAttributes) { + this.updateRootDirectoryAttributes = updateRootDirectoryAttributes; + return this; + } } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index 476b57b9959c4..2f9cddea316a6 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -117,7 +117,9 @@ public static DistCpOptions parse(String[] args) .withDirectWrite( command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch())) .withUseIterator( - command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch())); + command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch())) + .withUpdateRootDirectoryAttributes( + command.hasOption(DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES.getSwitch())); if (command.hasOption(DistCpOptionSwitch.DIFF.getSwitch())) { String[] snapshots = getVals(command, diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index e9eb4c0923657..b494f605f4ec2 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -616,10 +616,13 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, DistCpContext context) throws IOException { boolean syncOrOverwrite = context.shouldSyncFolder() || context.shouldOverwrite(); + boolean skipRootPath = syncOrOverwrite && + !context.shouldUpdateRootDirectoryAttributes(); for (CopyListingFileStatus fs : fileStatus) { if (fs.getPath().equals(sourcePathRoot) && - fs.isDirectory() && syncOrOverwrite) { - // Skip the root-paths when syncOrOverwrite + fs.isDirectory() && skipRootPath) { + // Skip the root-paths when skipRootPath (syncOrOverwrite and + // update root directory is not a must). LOG.debug("Skip {}", fs.getPath()); return; } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index 2272781f72476..a151d584b4872 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -75,6 +75,7 @@ public class CopyCommitter extends FileOutputCommitter { private boolean ignoreFailures = false; private boolean skipCrc = false; private int blocksPerChunk = 0; + private boolean updateRootDirectoryAttributes = false; /** * Create a output committer @@ -100,6 +101,8 @@ public void commitJob(JobContext jobContext) throws IOException { Configuration conf = jobContext.getConfiguration(); syncFolder = conf.getBoolean(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, false); overwrite = conf.getBoolean(DistCpConstants.CONF_LABEL_OVERWRITE, false); + updateRootDirectoryAttributes = + conf.getBoolean(CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES, false); targetPathExists = conf.getBoolean( DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, true); ignoreFailures = conf.getBoolean( @@ -336,9 +339,11 @@ private void preserveFileAttributesForDirectories(Configuration conf) Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); // - // Skip the root folder when syncOrOverwrite is true. + // Skip the root folder when skipRootDirectoryAttributes is true. // - if (targetRoot.equals(targetFile) && syncOrOverwrite) continue; + boolean skipRootDirectoryAttributes = + syncOrOverwrite && !updateRootDirectoryAttributes; + if (targetRoot.equals(targetFile) && skipRootDirectoryAttributes) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes, diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm index 136b6c8ca1339..4ff44b3d7c644 100644 --- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm +++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm @@ -363,6 +363,7 @@ Command Line Options | `-xtrack ` | Save information about missing source files to the specified path. | This option is only valid with `-update` option. This is an experimental property and it cannot be used with `-atomic` option. | | `-direct` | Write directly to destination paths | Useful for avoiding potentially very expensive temporary file rename operations when the destination is an object store | | `-useiterator` | Uses single threaded listStatusIterator to build listing | Useful for saving memory at the client side. Using this option will ignore the numListstatusThreads option | +| `-updateRootDirectoryAttributes` | Update root directory attributes (eg permissions, ownership ...) | Useful if you need to enforce root directory attributes update when using distcp | Architecture of DistCp ---------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java index 13497029a0767..034dad350aa8c 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java @@ -289,7 +289,7 @@ public void testToString() { "atomicWorkPath=null, logPath=null, sourceFileListing=abc, " + "sourcePaths=null, targetPath=xyz, filtersFile='null', " + "blocksPerChunk=0, copyBufferSize=8192, verboseLog=false, " + - "directWrite=false, useiterator=false}"; + "directWrite=false, useiterator=false, updateRootDirectoryAttributes=false}"; String optionString = option.toString(); Assert.assertEquals(val, optionString); Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(), @@ -563,4 +563,15 @@ public void testAppendToConf() { "otherwise it may not be fetched properly", expectedValForEmptyConfigKey, config.get("")); } + + @Test + public void testUpdateRootDirectoryAttributes() { + final DistCpOptions options = new DistCpOptions.Builder( + Collections.singletonList( + new Path("hdfs://localhost:8020/source")), + new Path("hdfs://localhost:8020/target/")) + .withUpdateRootDirectoryAttributes(true) + .build(); + Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes()); + } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java index 85b312a94b52f..3eb895ab074a0 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java @@ -804,4 +804,18 @@ public void testExclusionsOption() { "hdfs://localhost:8020/target/"}); assertThat(options.getFiltersFile()).isEqualTo("/tmp/filters.txt"); } + + @Test + public void testParseUpdateRootDirectoryAttributes() { + DistCpOptions options = OptionsParser.parse(new String[] { + "hdfs://localhost:8020/source/first", + "hdfs://localhost:8020/target/"}); + Assert.assertFalse(options.shouldUpdateRootDirectoryAttributes()); + + options = OptionsParser.parse(new String[] { + "-updateRootDirectoryAttributes", + "hdfs://localhost:8020/source/first", + "hdfs://localhost:8020/target/"}); + Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes()); + } } From 7ffaf20f5072cbcc80ef2256e24de61320ef5a9e Mon Sep 17 00:00:00 2001 From: Mohanad Elsafty Date: Thu, 10 Feb 2022 10:16:57 +0800 Subject: [PATCH 2/5] HADOOP-18117. Add more unittest --- .../apache/hadoop/tools/TestDistCpSystem.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index 47b850f4ba3e2..e67cafad37522 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.test.GenericTestUtils.getMethodName; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -44,6 +45,8 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.tools.util.DistCpTestUtils; import org.apache.hadoop.util.ToolRunner; import org.junit.AfterClass; import org.junit.Assert; @@ -551,4 +554,41 @@ public void testSourceRoot() throws Exception { String[] args2 = new String[]{rootStr, tgtStr2}; Assert.assertThat(ToolRunner.run(conf, new DistCp(), args2), is(0)); } + + @Test + public void testUpdateRootDirectoryAttributes() throws Exception { + FileSystem fs = cluster.getFileSystem(); + + Path source = new Path("/src"); + Path dest1 = new Path("/dest1"); + Path dest2 = new Path("/dest2"); + + fs.delete(source, true); + fs.delete(dest1, true); + fs.delete(dest2, true); + + // Create a source dir + fs.mkdirs(source); + fs.setOwner(source,"userA", "groupA"); + fs.setTimes(source, new Random().nextLong(), new Random().nextLong()); + + GenericTestUtils.createFiles(fs, source, 3, 5, 5); + + // should not preserve attrs + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest1.toString(), "-p -update", conf); + + FileStatus srcStatus = fs.getFileStatus(source); + FileStatus destStatus1 = fs.getFileStatus(dest1); + assertNotEquals(destStatus1.getOwner(), srcStatus.getOwner()); + assertNotEquals(destStatus1.getModificationTime(), srcStatus.getModificationTime()); + + // should preserve attrs + DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), + dest2.toString(), "-p -update -updateRootDirectoryAttributes", conf); + + FileStatus destStatus2 = fs.getFileStatus(dest2); + assertEquals(destStatus2.getOwner(), srcStatus.getOwner()); + assertEquals(destStatus2.getModificationTime(), srcStatus.getModificationTime()); + } } \ No newline at end of file From 2ce6d59fbfdb944b0ac8c06eb6e754cec047d998 Mon Sep 17 00:00:00 2001 From: Mohanad Elsafty Date: Thu, 10 Feb 2022 12:38:05 +0800 Subject: [PATCH 3/5] HADOOP-18117. fix yetus checkstyle --- .../src/main/java/org/apache/hadoop/tools/DistCpOptions.java | 4 ++-- .../java/org/apache/hadoop/tools/mapred/CopyCommitter.java | 4 +++- .../test/java/org/apache/hadoop/tools/TestDistCpSystem.java | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index 8fdfba76b24a3..5c2bc39f2e892 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -795,8 +795,8 @@ public Builder withUseIterator(boolean useItr) { return this; } - public Builder withUpdateRootDirectoryAttributes(boolean updateRootDirectoryAttributes) { - this.updateRootDirectoryAttributes = updateRootDirectoryAttributes; + public Builder withUpdateRootDirectoryAttributes(boolean updateRootDirectoryAttrs) { + this.updateRootDirectoryAttributes = updateRootDirectoryAttrs; return this; } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index a151d584b4872..9fc1f81858487 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -343,7 +343,9 @@ private void preserveFileAttributesForDirectories(Configuration conf) // boolean skipRootDirectoryAttributes = syncOrOverwrite && !updateRootDirectoryAttributes; - if (targetRoot.equals(targetFile) && skipRootDirectoryAttributes) continue; + if (targetRoot.equals(targetFile) && skipRootDirectoryAttributes) { + continue; + } FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes, diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index e67cafad37522..7fe763a89c0b1 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -569,7 +569,7 @@ public void testUpdateRootDirectoryAttributes() throws Exception { // Create a source dir fs.mkdirs(source); - fs.setOwner(source,"userA", "groupA"); + fs.setOwner(source, "userA", "groupA"); fs.setTimes(source, new Random().nextLong(), new Random().nextLong()); GenericTestUtils.createFiles(fs, source, 3, 5, 5); From 4848bd860fcdf7cdcfc05ec499dfaec480dad89d Mon Sep 17 00:00:00 2001 From: Author Mohanad Elsafty Date: Mon, 14 Feb 2022 20:17:35 +0800 Subject: [PATCH 4/5] HADOOP-18117. fix distcp unittest --- .../org/apache/hadoop/tools/TestDistCpSystem.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index 7fe763a89c0b1..eeaf93a61f97a 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -580,15 +580,18 @@ public void testUpdateRootDirectoryAttributes() throws Exception { FileStatus srcStatus = fs.getFileStatus(source); FileStatus destStatus1 = fs.getFileStatus(dest1); - assertNotEquals(destStatus1.getOwner(), srcStatus.getOwner()); - assertNotEquals(destStatus1.getModificationTime(), srcStatus.getModificationTime()); + assertNotEquals(srcStatus.getOwner(), destStatus1.getOwner()); + assertNotEquals(srcStatus.getModificationTime(), + destStatus1.getModificationTime()); // should preserve attrs DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest2.toString(), "-p -update -updateRootDirectoryAttributes", conf); + dest2.toString(), "-p -update -updateRootDirectoryAttributes", + conf); FileStatus destStatus2 = fs.getFileStatus(dest2); - assertEquals(destStatus2.getOwner(), srcStatus.getOwner()); - assertEquals(destStatus2.getModificationTime(), srcStatus.getModificationTime()); + assertEquals(srcStatus.getOwner(), destStatus2.getOwner()); + assertEquals(srcStatus.getModificationTime(), + destStatus2.getModificationTime()); } } \ No newline at end of file From f62d3c39a16920aa88cfa67ee7a56562d24c6b7d Mon Sep 17 00:00:00 2001 From: Author Mohanad Elsafty Date: Mon, 14 Feb 2022 22:04:10 +0800 Subject: [PATCH 5/5] HADOOP-18117. simplify updateRootDirectoryAttributes naming --- .../apache/hadoop/tools/DistCpConstants.java | 4 ++-- .../apache/hadoop/tools/DistCpContext.java | 4 ++-- .../hadoop/tools/DistCpOptionSwitch.java | 7 ++++--- .../apache/hadoop/tools/DistCpOptions.java | 20 +++++++++---------- .../apache/hadoop/tools/OptionsParser.java | 4 ++-- .../hadoop/tools/SimpleCopyListing.java | 3 +-- .../hadoop/tools/mapred/CopyCommitter.java | 13 ++++++------ .../src/site/markdown/DistCp.md.vm | 2 +- .../hadoop/tools/TestDistCpOptions.java | 8 ++++---- .../apache/hadoop/tools/TestDistCpSystem.java | 4 ++-- .../hadoop/tools/TestOptionsParser.java | 8 ++++---- 11 files changed, 38 insertions(+), 39 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java index 95a53c3a2f0b2..289d552b86219 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java @@ -86,8 +86,8 @@ private DistCpConstants() { public static final String CONF_LABEL_SPLIT_RATIO = "distcp.dynamic.split.ratio"; public static final String CONF_LABEL_DIRECT_WRITE = "distcp.direct.write"; - public static final String CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES = - "distcp.update.root.directory.attributes"; + public static final String CONF_LABEL_UPDATE_ROOT = + "distcp.update.root.attributes"; /* Total bytes to be copied. Updated by copylisting. Unfiltered count */ public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected"; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java index 770ff44ba0cdd..8c443f66a0529 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java @@ -175,8 +175,8 @@ public boolean shouldUseIterator() { return options.shouldUseIterator(); } - public boolean shouldUpdateRootDirectoryAttributes() { - return options.shouldUpdateRootDirectoryAttributes(); + public boolean shouldUpdateRoot() { + return options.shouldUpdateRoot(); } public final boolean splitLargeFile() { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index bf579071670ad..becd537f68b41 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -246,9 +246,10 @@ public enum DistCpOptionSwitch { "Use single threaded list status iterator to build " + "the listing to save the memory utilisation at the client")), - UPDATE_ROOT_DIRECTORY_ATTRIBUTES(DistCpConstants.CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES, - new Option("updateRootDirectoryAttributes", false, - "Update root directory attributes (eg permissions, ownership ...)")); + UPDATE_ROOT(DistCpConstants.CONF_LABEL_UPDATE_ROOT, + new Option("updateRoot", false, + "Update root directory attributes " + + "(eg permissions, ownership ...)")); public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct"; private final String confLabel; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index 5c2bc39f2e892..e5685c1492507 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -162,7 +162,7 @@ public final class DistCpOptions { private final boolean useIterator; - private final boolean updateRootDirectoryAttributes; + private final boolean updateRoot; /** * File attributes for preserve. @@ -231,7 +231,7 @@ private DistCpOptions(Builder builder) { this.useIterator = builder.useIterator; - this.updateRootDirectoryAttributes = builder.updateRootDirectoryAttributes; + this.updateRoot = builder.updateRoot; } public Path getSourceFileListing() { @@ -378,8 +378,8 @@ public boolean shouldUseIterator() { return useIterator; } - public boolean shouldUpdateRootDirectoryAttributes() { - return updateRootDirectoryAttributes; + public boolean shouldUpdateRoot() { + return updateRoot; } /** @@ -436,8 +436,8 @@ public void appendToConf(Configuration conf) { DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.USE_ITERATOR, String.valueOf(useIterator)); - DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES, - String.valueOf(updateRootDirectoryAttributes)); + DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.UPDATE_ROOT, + String.valueOf(updateRoot)); } /** @@ -476,7 +476,7 @@ public String toString() { ", verboseLog=" + verboseLog + ", directWrite=" + directWrite + ", useiterator=" + useIterator + - ", updateRootDirectoryAttributes=" + updateRootDirectoryAttributes + + ", updateRoot=" + updateRoot + '}'; } @@ -530,7 +530,7 @@ public static class Builder { private boolean useIterator = false; - private boolean updateRootDirectoryAttributes = false; + private boolean updateRoot = false; public Builder(List sourcePaths, Path targetPath) { Preconditions.checkArgument(sourcePaths != null && !sourcePaths.isEmpty(), @@ -795,8 +795,8 @@ public Builder withUseIterator(boolean useItr) { return this; } - public Builder withUpdateRootDirectoryAttributes(boolean updateRootDirectoryAttrs) { - this.updateRootDirectoryAttributes = updateRootDirectoryAttrs; + public Builder withUpdateRoot(boolean updateRootAttrs) { + this.updateRoot = updateRootAttrs; return this; } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index 2f9cddea316a6..f2875c5f33a4f 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -118,8 +118,8 @@ public static DistCpOptions parse(String[] args) command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch())) .withUseIterator( command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch())) - .withUpdateRootDirectoryAttributes( - command.hasOption(DistCpOptionSwitch.UPDATE_ROOT_DIRECTORY_ATTRIBUTES.getSwitch())); + .withUpdateRoot( + command.hasOption(DistCpOptionSwitch.UPDATE_ROOT.getSwitch())); if (command.hasOption(DistCpOptionSwitch.DIFF.getSwitch())) { String[] snapshots = getVals(command, diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index b494f605f4ec2..c3f097da4ba96 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -616,8 +616,7 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, DistCpContext context) throws IOException { boolean syncOrOverwrite = context.shouldSyncFolder() || context.shouldOverwrite(); - boolean skipRootPath = syncOrOverwrite && - !context.shouldUpdateRootDirectoryAttributes(); + boolean skipRootPath = syncOrOverwrite && !context.shouldUpdateRoot(); for (CopyListingFileStatus fs : fileStatus) { if (fs.getPath().equals(sourcePathRoot) && fs.isDirectory() && skipRootPath) { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index 9fc1f81858487..8c2bc82d3fc7f 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -75,7 +75,7 @@ public class CopyCommitter extends FileOutputCommitter { private boolean ignoreFailures = false; private boolean skipCrc = false; private int blocksPerChunk = 0; - private boolean updateRootDirectoryAttributes = false; + private boolean updateRoot = false; /** * Create a output committer @@ -101,8 +101,8 @@ public void commitJob(JobContext jobContext) throws IOException { Configuration conf = jobContext.getConfiguration(); syncFolder = conf.getBoolean(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, false); overwrite = conf.getBoolean(DistCpConstants.CONF_LABEL_OVERWRITE, false); - updateRootDirectoryAttributes = - conf.getBoolean(CONF_LABEL_UPDATE_ROOT_DIRECTORY_ATTRIBUTES, false); + updateRoot = + conf.getBoolean(CONF_LABEL_UPDATE_ROOT, false); targetPathExists = conf.getBoolean( DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, true); ignoreFailures = conf.getBoolean( @@ -339,11 +339,10 @@ private void preserveFileAttributesForDirectories(Configuration conf) Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); // - // Skip the root folder when skipRootDirectoryAttributes is true. + // Skip the root folder when skipRoot is true. // - boolean skipRootDirectoryAttributes = - syncOrOverwrite && !updateRootDirectoryAttributes; - if (targetRoot.equals(targetFile) && skipRootDirectoryAttributes) { + boolean skipRoot = syncOrOverwrite && !updateRoot; + if (targetRoot.equals(targetFile) && skipRoot) { continue; } diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm index 4ff44b3d7c644..030f20c23b602 100644 --- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm +++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm @@ -363,7 +363,7 @@ Command Line Options | `-xtrack ` | Save information about missing source files to the specified path. | This option is only valid with `-update` option. This is an experimental property and it cannot be used with `-atomic` option. | | `-direct` | Write directly to destination paths | Useful for avoiding potentially very expensive temporary file rename operations when the destination is an object store | | `-useiterator` | Uses single threaded listStatusIterator to build listing | Useful for saving memory at the client side. Using this option will ignore the numListstatusThreads option | -| `-updateRootDirectoryAttributes` | Update root directory attributes (eg permissions, ownership ...) | Useful if you need to enforce root directory attributes update when using distcp | +| `-updateRoot` | Update root directory attributes (eg permissions, ownership ...) | Useful if you need to enforce root directory attributes update when using distcp | Architecture of DistCp ---------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java index 034dad350aa8c..ac2100ec98ad0 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java @@ -289,7 +289,7 @@ public void testToString() { "atomicWorkPath=null, logPath=null, sourceFileListing=abc, " + "sourcePaths=null, targetPath=xyz, filtersFile='null', " + "blocksPerChunk=0, copyBufferSize=8192, verboseLog=false, " + - "directWrite=false, useiterator=false, updateRootDirectoryAttributes=false}"; + "directWrite=false, useiterator=false, updateRoot=false}"; String optionString = option.toString(); Assert.assertEquals(val, optionString); Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(), @@ -565,13 +565,13 @@ public void testAppendToConf() { } @Test - public void testUpdateRootDirectoryAttributes() { + public void testUpdateRoot() { final DistCpOptions options = new DistCpOptions.Builder( Collections.singletonList( new Path("hdfs://localhost:8020/source")), new Path("hdfs://localhost:8020/target/")) - .withUpdateRootDirectoryAttributes(true) + .withUpdateRoot(true) .build(); - Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes()); + Assert.assertTrue(options.shouldUpdateRoot()); } } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index eeaf93a61f97a..64c6800f9446a 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -556,7 +556,7 @@ public void testSourceRoot() throws Exception { } @Test - public void testUpdateRootDirectoryAttributes() throws Exception { + public void testUpdateRoot() throws Exception { FileSystem fs = cluster.getFileSystem(); Path source = new Path("/src"); @@ -586,7 +586,7 @@ public void testUpdateRootDirectoryAttributes() throws Exception { // should preserve attrs DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(), - dest2.toString(), "-p -update -updateRootDirectoryAttributes", + dest2.toString(), "-p -update -updateRoot", conf); FileStatus destStatus2 = fs.getFileStatus(dest2); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java index 3eb895ab074a0..1ffdd89073dec 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java @@ -806,16 +806,16 @@ public void testExclusionsOption() { } @Test - public void testParseUpdateRootDirectoryAttributes() { + public void testParseUpdateRoot() { DistCpOptions options = OptionsParser.parse(new String[] { "hdfs://localhost:8020/source/first", "hdfs://localhost:8020/target/"}); - Assert.assertFalse(options.shouldUpdateRootDirectoryAttributes()); + Assert.assertFalse(options.shouldUpdateRoot()); options = OptionsParser.parse(new String[] { - "-updateRootDirectoryAttributes", + "-updateRoot", "hdfs://localhost:8020/source/first", "hdfs://localhost:8020/target/"}); - Assert.assertTrue(options.shouldUpdateRootDirectoryAttributes()); + Assert.assertTrue(options.shouldUpdateRoot()); } }