From d38a2b290a2ad32392860acf5037f9df0aaa3925 Mon Sep 17 00:00:00 2001 From: Andrew Olson Date: Mon, 25 Feb 2019 15:54:34 -0600 Subject: [PATCH 1/2] HADOOP-16147: Allow CopyListing sequence file keys and values to be more easily customized --- .../org/apache/hadoop/tools/CopyListing.java | 19 +++++++++++++++++++ .../hadoop/tools/SimpleCopyListing.java | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java index e018b0b9573b7..4e18bbace6369 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java @@ -248,6 +248,25 @@ protected Credentials getCredentials() { return credentials; } + /** + * Returns the key for an entry in the copy listing sequence file + * @param sourcePathRoot the root source path for determining the relative target path + * @param fileStatus the copy listing file status + * @return the key for the sequence file entry + */ + protected Text getFileListingKey(Path sourcePathRoot, CopyListingFileStatus fileStatus) { + return new Text(DistCpUtils.getRelativePath(sourcePathRoot, fileStatus.getPath())); + } + + /** + * Returns the value for an entry in the copy listing sequence file + * @param fileStatus the copy listing file status + * @return the value for the sequence file entry + */ + protected CopyListingFileStatus getFileListingValue(CopyListingFileStatus fileStatus) { + return fileStatus; + } + /** * Public Factory method with which the appropriate CopyListing implementation may be retrieved. * @param configuration The input configuration. diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index a908e1223ae7c..7e5a26a36abe7 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -718,8 +718,8 @@ private void writeToFileListing(SequenceFile.Writer fileListWriter, return; } - fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot, - fileStatus.getPath())), fileStatus); + fileListWriter.append(getFileListingKey(sourcePathRoot, fileStatus), + getFileListingValue(fileStatus)); fileListWriter.sync(); if (!fileStatus.isDirectory()) { From 81cfd409cae659b4724a715de120e467f132fedd Mon Sep 17 00:00:00 2001 From: Andrew Olson Date: Wed, 27 Feb 2019 10:08:03 -0600 Subject: [PATCH 2/2] HADOOP-16147: Fix checkstyle issues --- .../org/apache/hadoop/tools/CopyListing.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java index 4e18bbace6369..6f8aa34b29584 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java @@ -249,21 +249,25 @@ protected Credentials getCredentials() { } /** - * Returns the key for an entry in the copy listing sequence file - * @param sourcePathRoot the root source path for determining the relative target path + * Returns the key for an entry in the copy listing sequence file. + * @param sourcePathRoot the root source path for determining the relative + * target path * @param fileStatus the copy listing file status * @return the key for the sequence file entry */ - protected Text getFileListingKey(Path sourcePathRoot, CopyListingFileStatus fileStatus) { - return new Text(DistCpUtils.getRelativePath(sourcePathRoot, fileStatus.getPath())); + protected Text getFileListingKey(Path sourcePathRoot, + CopyListingFileStatus fileStatus) { + return new Text(DistCpUtils.getRelativePath(sourcePathRoot, + fileStatus.getPath())); } /** - * Returns the value for an entry in the copy listing sequence file + * Returns the value for an entry in the copy listing sequence file. * @param fileStatus the copy listing file status * @return the value for the sequence file entry */ - protected CopyListingFileStatus getFileListingValue(CopyListingFileStatus fileStatus) { + protected CopyListingFileStatus getFileListingValue( + CopyListingFileStatus fileStatus) { return fileStatus; }