Skip to content

Commit a4f4590

Browse files
authored
HADOOP-18117. Add an option to preserve root directory permissions (#3970)
1 parent de526e1 commit a4f4590

File tree

11 files changed

+115
-7
lines changed

11 files changed

+115
-7
lines changed

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ private DistCpConstants() {
8686
public static final String CONF_LABEL_SPLIT_RATIO =
8787
"distcp.dynamic.split.ratio";
8888
public static final String CONF_LABEL_DIRECT_WRITE = "distcp.direct.write";
89+
public static final String CONF_LABEL_UPDATE_ROOT =
90+
"distcp.update.root.attributes";
8991

9092
/* Total bytes to be copied. Updated by copylisting. Unfiltered count */
9193
public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpContext.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ public boolean shouldUseIterator() {
175175
return options.shouldUseIterator();
176176
}
177177

178+
public boolean shouldUpdateRoot() {
179+
return options.shouldUpdateRoot();
180+
}
181+
178182
public final boolean splitLargeFile() {
179183
return options.getBlocksPerChunk() > 0;
180184
}

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,12 @@ public enum DistCpOptionSwitch {
244244
USE_ITERATOR(DistCpConstants.CONF_LABEL_USE_ITERATOR,
245245
new Option("useiterator", false,
246246
"Use single threaded list status iterator to build "
247-
+ "the listing to save the memory utilisation at the client"));
247+
+ "the listing to save the memory utilisation at the client")),
248248

249+
UPDATE_ROOT(DistCpConstants.CONF_LABEL_UPDATE_ROOT,
250+
new Option("updateRoot", false,
251+
"Update root directory attributes "
252+
+ "(eg permissions, ownership ...)"));
249253

250254
public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
251255
private final String confLabel;

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ public final class DistCpOptions {
162162

163163
private final boolean useIterator;
164164

165+
private final boolean updateRoot;
166+
165167
/**
166168
* File attributes for preserve.
167169
*
@@ -228,6 +230,8 @@ private DistCpOptions(Builder builder) {
228230
this.directWrite = builder.directWrite;
229231

230232
this.useIterator = builder.useIterator;
233+
234+
this.updateRoot = builder.updateRoot;
231235
}
232236

233237
public Path getSourceFileListing() {
@@ -374,6 +378,10 @@ public boolean shouldUseIterator() {
374378
return useIterator;
375379
}
376380

381+
public boolean shouldUpdateRoot() {
382+
return updateRoot;
383+
}
384+
377385
/**
378386
* Add options to configuration. These will be used in the Mapper/committer
379387
*
@@ -427,6 +435,9 @@ public void appendToConf(Configuration conf) {
427435

428436
DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.USE_ITERATOR,
429437
String.valueOf(useIterator));
438+
439+
DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.UPDATE_ROOT,
440+
String.valueOf(updateRoot));
430441
}
431442

432443
/**
@@ -465,6 +476,7 @@ public String toString() {
465476
", verboseLog=" + verboseLog +
466477
", directWrite=" + directWrite +
467478
", useiterator=" + useIterator +
479+
", updateRoot=" + updateRoot +
468480
'}';
469481
}
470482

@@ -518,6 +530,8 @@ public static class Builder {
518530

519531
private boolean useIterator = false;
520532

533+
private boolean updateRoot = false;
534+
521535
public Builder(List<Path> sourcePaths, Path targetPath) {
522536
Preconditions.checkArgument(sourcePaths != null && !sourcePaths.isEmpty(),
523537
"Source paths should not be null or empty!");
@@ -780,6 +794,11 @@ public Builder withUseIterator(boolean useItr) {
780794
this.useIterator = useItr;
781795
return this;
782796
}
797+
798+
public Builder withUpdateRoot(boolean updateRootAttrs) {
799+
this.updateRoot = updateRootAttrs;
800+
return this;
801+
}
783802
}
784803

785804
}

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ public static DistCpOptions parse(String[] args)
117117
.withDirectWrite(
118118
command.hasOption(DistCpOptionSwitch.DIRECT_WRITE.getSwitch()))
119119
.withUseIterator(
120-
command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch()));
120+
command.hasOption(DistCpOptionSwitch.USE_ITERATOR.getSwitch()))
121+
.withUpdateRoot(
122+
command.hasOption(DistCpOptionSwitch.UPDATE_ROOT.getSwitch()));
121123

122124
if (command.hasOption(DistCpOptionSwitch.DIFF.getSwitch())) {
123125
String[] snapshots = getVals(command,

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,10 +616,12 @@ private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
616616
DistCpContext context) throws IOException {
617617
boolean syncOrOverwrite = context.shouldSyncFolder() ||
618618
context.shouldOverwrite();
619+
boolean skipRootPath = syncOrOverwrite && !context.shouldUpdateRoot();
619620
for (CopyListingFileStatus fs : fileStatus) {
620621
if (fs.getPath().equals(sourcePathRoot) &&
621-
fs.isDirectory() && syncOrOverwrite) {
622-
// Skip the root-paths when syncOrOverwrite
622+
fs.isDirectory() && skipRootPath) {
623+
// Skip the root-paths when skipRootPath (syncOrOverwrite and
624+
// update root directory is not a must).
623625
LOG.debug("Skip {}", fs.getPath());
624626
return;
625627
}

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public class CopyCommitter extends FileOutputCommitter {
7575
private boolean ignoreFailures = false;
7676
private boolean skipCrc = false;
7777
private int blocksPerChunk = 0;
78+
private boolean updateRoot = false;
7879

7980
/**
8081
* Create a output committer
@@ -100,6 +101,8 @@ public void commitJob(JobContext jobContext) throws IOException {
100101
Configuration conf = jobContext.getConfiguration();
101102
syncFolder = conf.getBoolean(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, false);
102103
overwrite = conf.getBoolean(DistCpConstants.CONF_LABEL_OVERWRITE, false);
104+
updateRoot =
105+
conf.getBoolean(CONF_LABEL_UPDATE_ROOT, false);
103106
targetPathExists = conf.getBoolean(
104107
DistCpConstants.CONF_LABEL_TARGET_PATH_EXISTS, true);
105108
ignoreFailures = conf.getBoolean(
@@ -336,9 +339,12 @@ private void preserveFileAttributesForDirectories(Configuration conf)
336339

337340
Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
338341
//
339-
// Skip the root folder when syncOrOverwrite is true.
342+
// Skip the root folder when skipRoot is true.
340343
//
341-
if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;
344+
boolean skipRoot = syncOrOverwrite && !updateRoot;
345+
if (targetRoot.equals(targetFile) && skipRoot) {
346+
continue;
347+
}
342348

343349
FileSystem targetFS = targetFile.getFileSystem(conf);
344350
DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,

hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ Command Line Options
363363
| `-xtrack <path>` | Save information about missing source files to the specified path. | This option is only valid with `-update` option. This is an experimental property and it cannot be used with `-atomic` option. |
364364
| `-direct` | Write directly to destination paths | Useful for avoiding potentially very expensive temporary file rename operations when the destination is an object store |
365365
| `-useiterator` | Uses single threaded listStatusIterator to build listing | Useful for saving memory at the client side. Using this option will ignore the numListstatusThreads option |
366+
| `-updateRoot` | Update root directory attributes (eg permissions, ownership ...) | Useful if you need to enforce root directory attributes update when using distcp |
366367

367368
Architecture of DistCp
368369
----------------------

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpOptions.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ public void testToString() {
289289
"atomicWorkPath=null, logPath=null, sourceFileListing=abc, " +
290290
"sourcePaths=null, targetPath=xyz, filtersFile='null', " +
291291
"blocksPerChunk=0, copyBufferSize=8192, verboseLog=false, " +
292-
"directWrite=false, useiterator=false}";
292+
"directWrite=false, useiterator=false, updateRoot=false}";
293293
String optionString = option.toString();
294294
Assert.assertEquals(val, optionString);
295295
Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(),
@@ -563,4 +563,15 @@ public void testAppendToConf() {
563563
"otherwise it may not be fetched properly",
564564
expectedValForEmptyConfigKey, config.get(""));
565565
}
566+
567+
@Test
568+
public void testUpdateRoot() {
569+
final DistCpOptions options = new DistCpOptions.Builder(
570+
Collections.singletonList(
571+
new Path("hdfs://localhost:8020/source")),
572+
new Path("hdfs://localhost:8020/target/"))
573+
.withUpdateRoot(true)
574+
.build();
575+
Assert.assertTrue(options.shouldUpdateRoot());
576+
}
566577
}

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.apache.hadoop.test.GenericTestUtils.getMethodName;
2222
import static org.hamcrest.core.Is.is;
2323
import static org.junit.Assert.assertEquals;
24+
import static org.junit.Assert.assertNotEquals;
2425
import static org.junit.Assert.assertTrue;
2526

2627
import java.io.ByteArrayOutputStream;
@@ -44,6 +45,8 @@
4445
import org.apache.hadoop.hdfs.DistributedFileSystem;
4546
import org.apache.hadoop.hdfs.MiniDFSCluster;
4647
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
48+
import org.apache.hadoop.test.GenericTestUtils;
49+
import org.apache.hadoop.tools.util.DistCpTestUtils;
4750
import org.apache.hadoop.util.ToolRunner;
4851
import org.junit.AfterClass;
4952
import org.junit.Assert;
@@ -551,4 +554,44 @@ public void testSourceRoot() throws Exception {
551554
String[] args2 = new String[]{rootStr, tgtStr2};
552555
Assert.assertThat(ToolRunner.run(conf, new DistCp(), args2), is(0));
553556
}
557+
558+
@Test
559+
public void testUpdateRoot() throws Exception {
560+
FileSystem fs = cluster.getFileSystem();
561+
562+
Path source = new Path("/src");
563+
Path dest1 = new Path("/dest1");
564+
Path dest2 = new Path("/dest2");
565+
566+
fs.delete(source, true);
567+
fs.delete(dest1, true);
568+
fs.delete(dest2, true);
569+
570+
// Create a source dir
571+
fs.mkdirs(source);
572+
fs.setOwner(source, "userA", "groupA");
573+
fs.setTimes(source, new Random().nextLong(), new Random().nextLong());
574+
575+
GenericTestUtils.createFiles(fs, source, 3, 5, 5);
576+
577+
// should not preserve attrs
578+
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
579+
dest1.toString(), "-p -update", conf);
580+
581+
FileStatus srcStatus = fs.getFileStatus(source);
582+
FileStatus destStatus1 = fs.getFileStatus(dest1);
583+
assertNotEquals(srcStatus.getOwner(), destStatus1.getOwner());
584+
assertNotEquals(srcStatus.getModificationTime(),
585+
destStatus1.getModificationTime());
586+
587+
// should preserve attrs
588+
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
589+
dest2.toString(), "-p -update -updateRoot",
590+
conf);
591+
592+
FileStatus destStatus2 = fs.getFileStatus(dest2);
593+
assertEquals(srcStatus.getOwner(), destStatus2.getOwner());
594+
assertEquals(srcStatus.getModificationTime(),
595+
destStatus2.getModificationTime());
596+
}
554597
}

0 commit comments

Comments
 (0)