Skip to content

Commit 5a5aa30

Browse files
committed
HADOOP-17742. fix distcp fail when copying to ftp filesystem
1 parent f734455 commit 5a5aa30

File tree

3 files changed

+29
-8
lines changed

3 files changed

+29
-8
lines changed

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,6 @@ private DistCpConstants() {
191191

192192
public static final String CLASS_INSTANTIATION_ERROR_MSG =
193193
"Unable to instantiate ";
194+
195+
public static final String TARGET_TMP_FILE_PREFIX = "distcp.tmp.";
194196
}

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,8 @@
4545

4646
import java.io.FileNotFoundException;
4747
import java.io.IOException;
48-
import java.util.ArrayList;
49-
import java.util.EnumSet;
50-
import java.util.LinkedList;
51-
import java.util.List;
48+
import java.util.*;
49+
import java.util.stream.Collectors;
5250

5351
import static org.apache.hadoop.tools.DistCpConstants.*;
5452

@@ -170,17 +168,38 @@ private void deleteAttemptTempFiles(Path targetWorkPath,
170168
return;
171169
}
172170

173-
FileStatus[] tempFiles = targetFS.globStatus(
174-
new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
171+
String tmpFilePattern = TARGET_TMP_FILE_PREFIX + jobId.replaceAll("job","attempt") + "*";
172+
List<FileStatus> tempFiles = listTmpFilePaths(targetFS, targetWorkPath, tmpFilePattern);
175173

176-
if (tempFiles != null && tempFiles.length > 0) {
174+
if (tempFiles != null && tempFiles.size() > 0) {
177175
for (FileStatus file : tempFiles) {
178176
LOG.info("Cleaning up " + file.getPath());
179177
targetFS.delete(file.getPath(), false);
180178
}
181179
}
182180
}
183181

182+
private List<FileStatus> listTmpFilePaths(FileSystem fileSystem, Path targetWorkPath, String pattern) throws IOException {
183+
List<Path> tmpPaths = new ArrayList<>();
184+
tmpPaths.add(targetWorkPath);
185+
listChildrenPaths(fileSystem, targetWorkPath, tmpPaths);
186+
List<FileStatus> allTmpFiles = new ArrayList<>(tmpPaths.size());
187+
for (Path path : tmpPaths) {
188+
FileStatus[] tmpFiles = fileSystem.globStatus(new Path(path, pattern));
189+
allTmpFiles.addAll(Arrays.asList(tmpFiles));
190+
}
191+
return allTmpFiles;
192+
}
193+
194+
private void listChildrenPaths(FileSystem fileSystem, Path targetWorkPath, List<Path> paths) throws IOException {
195+
List<Path> directoryPaths = Arrays.stream(fileSystem.listStatus(targetWorkPath))
196+
.filter(status -> status.isDirectory()).map(status -> status.getPath()).collect(Collectors.toList());
197+
paths.addAll(directoryPaths);
198+
for (Path path : directoryPaths) {
199+
listChildrenPaths(fileSystem, path, paths);
200+
}
201+
}
202+
184203
/**
185204
* Cleanup meta folder and other temporary files
186205
*

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ private Path getTempFile(Path target, Mapper.Context context) {
263263

264264
Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent()
265265
: targetWorkPath;
266-
Path tempFile = new Path(root, ".distcp.tmp." +
266+
Path tempFile = new Path(root, DistCpConstants.TARGET_TMP_FILE_PREFIX +
267267
context.getTaskAttemptID().toString() +
268268
"." + String.valueOf(System.currentTimeMillis()));
269269
LOG.info("Creating temp file: {}", tempFile);

0 commit comments

Comments
 (0)