diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index cc9c284c9fa55..a188b054c6547 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -643,7 +643,7 @@ boolean apply(Path p) throws IOException { } /** - * Rename files/dirs + * Rename files/dirs. */ @Override @SuppressWarnings("deprecation") @@ -672,6 +672,51 @@ public boolean rename(Path src, Path dst) throws IOException { } } + /** + * Rename files/dirs. + *

+ * If the rename of the inner FS does not fail by raising + * an exception, any checksum file for the source is copied. + * If the source had no checksum, any checksum for the destination + * is deleted, so that there is no inconsistent checksum + * for the now-renamed file. + *

+ * @param source source file/dir + * @param dest destination file/dir/path + * @param options options for the renaming of the source file. + * These are not used for renaming the checksum, which always uses + * {@link Options.Rename#OVERWRITE}.Are + * @throws IOException + */ + @Override + public void rename(final Path source, + final Path dest, + final Options.Rename... options) + throws IOException { + + // invoke rename on the wrapped FS. + // This will raise an exception on any failure. + fs.rename(source, dest, options); + + // if the application gets this far, the rename + // succeeded. Therefore the checksum should + // be renamed too, + Path srcCheckFile = getChecksumFile(source); + Path dstCheckFile = getChecksumFile(dest); + if (fs.exists(srcCheckFile)) { + // try to rename checksum + // the OVERWRITE option is always used; there's no attempt + // to merge in any options supplied for merging the + // main file. + // If new options are added, this decision may need to + // be revisited. + fs.rename(srcCheckFile, dstCheckFile, Options.Rename.OVERWRITE); + } else { + // no src checksum, so remove dest checksum if present + fs.delete(dstCheckFile, true); + } + } + /** * Implement the delete(Path, boolean) in checksum * file system. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java index a8e7b71bb119c..c2c748e493d47 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSExceptionMessages.java @@ -25,18 +25,18 @@ public class FSExceptionMessages { /** - * The operation failed because the stream is closed: {@value} + * The operation failed because the stream is closed: {@value}. */ public static final String STREAM_IS_CLOSED = "Stream is closed!"; /** - * Negative offset seek forbidden : {@value} + * Negative offset seek forbidden : {@value}. */ public static final String NEGATIVE_SEEK = "Cannot seek to a negative offset"; /** - * Seeks : {@value} + * Seeks : {@value}. */ public static final String CANNOT_SEEK_PAST_EOF = "Attempted to seek or read past the end of the file"; @@ -51,4 +51,101 @@ public class FSExceptionMessages { public static final String PERMISSION_DENIED_BY_STICKY_BIT = "Permission denied by sticky bit"; + + /** + * Renaming a destination under source is forbidden. + *

+ * {@value}. + */ + public static final String RENAME_DEST_UNDER_SOURCE = + "Rename destination %s is a directory or file under source %s"; + + /** + * Renaming a destination to source is forbidden. + *

+ * {@value}. + */ + public static final String RENAME_DEST_EQUALS_SOURCE = + "The source %s and destination %s are the same"; + + /** + * Renaming to root is forbidden. + *

+ * {@value}. + */ + public static final String RENAME_DEST_IS_ROOT = + "Rename destination cannot be the root path \"/\""; + + /** + * The parent of a rename destination is not found. + *

+ * {@value}. + */ + public static final String RENAME_DEST_NO_PARENT_OF = + "Rename destination parent of %s not found"; + + /** + * The parent of a rename destination is not found. + * This is a format string, taking the parent path of the destination + *

+ * {@value}. + */ + public static final String RENAME_DEST_NO_PARENT = + "Rename destination parent %s not found"; + + /** + * The parent of a rename destination is not a directory. + *

+ * {@value}. + */ + public static final String RENAME_DEST_PARENT_NOT_DIRECTORY = + "Rename destination parent %s is a file"; + + /** + * The rename destination is not an empty directory. + *

+ * {@value}. + */ + public static final String RENAME_DEST_NOT_EMPTY = + "Rename destination directory is not empty: %s"; + + /** + * The rename destination is not an empty directory. + *

+ * {@value}. + */ + public static final String RENAME_DEST_EXISTS = + "Rename destination %s already exists"; + + /** + * The rename source doesn't exist. + *

+ * {@value}. + */ + public static final String RENAME_SOURCE_NOT_FOUND = + "Rename source %s is not found"; + + /** + * The rename source and dest are of different types. + *

+ * {@value}. + */ + public static final String RENAME_SOURCE_DEST_DIFFERENT_TYPE = + "Rename source %s and destination %s are of different types"; + + /** + * The rename source doesn't exist. + *

+ * {@value}. + */ + public static final String RENAME_SOURCE_IS_ROOT = + "Rename source cannot be the root"; + + /** + * The rename failed for an unknown reason. + *

+ * {@value}. + */ + public static final String RENAME_FAILED = "Rename from %s to %s failed"; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index ab5040486dffc..36fedc03b3a15 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -59,6 +59,7 @@ import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; import org.apache.hadoop.fs.impl.FutureDataInputStreamBuilderImpl; import org.apache.hadoop.fs.impl.OpenFileParameters; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; @@ -1519,94 +1520,58 @@ public boolean setReplication(Path src, short replication) public abstract boolean rename(Path src, Path dst) throws IOException; /** - * Renames Path src to Path dst + * Renames Path source to Path dest. * *

* If OVERWRITE option is not passed as an argument, rename fails - * if the dst already exists. + * if the dest already exists. *

* If OVERWRITE option is passed as an argument, rename overwrites - * the dst if it is a file or an empty directory. Rename fails if dst is + * the dest if it is a file or an empty directory. Rename fails if dest is * a non-empty directory. *

* Note that atomicity of rename is dependent on the file system * implementation. Please refer to the file system documentation for * details. This default implementation is non atomic. - *

- * This method is deprecated since it is a temporary method added to - * support the transition from FileSystem to FileContext for user - * applications. * - * @param src path to be renamed - * @param dst new path after rename - * @throws FileNotFoundException src path does not exist, or the parent - * path of dst does not exist. + * @param source path to be renamed + * @param dest new path after rename + * @param options rename options. + * @throws FileNotFoundException source path does not exist, or the parent + * path of dest does not exist. * @throws FileAlreadyExistsException dest path exists and is a file * @throws ParentNotDirectoryException if the parent path of dest is not * a directory * @throws IOException on failure */ - @Deprecated - protected void rename(final Path src, final Path dst, + public void rename(final Path source, final Path dest, final Rename... options) throws IOException { + final Path sourcePath = makeQualified(source); + final Path destPath = makeQualified(dest); // Default implementation - final FileStatus srcStatus = getFileLinkStatus(src); - if (srcStatus == null) { - throw new FileNotFoundException("rename source " + src + " not found."); - } - - boolean overwrite = false; - if (null != options) { - for (Rename option : options) { - if (option == Rename.OVERWRITE) { - overwrite = true; - } - } - } + new FileSystemRename3Action() + .rename( + new FileSystemRename3Action.RenameValidationBuilder() + .withSourcePath(sourcePath) + .withDestPath(destPath) + .withRenameCallbacks( + createRenameCallbacks()) + .withRenameOptions(options) + .build()); + } - FileStatus dstStatus; - try { - dstStatus = getFileLinkStatus(dst); - } catch (IOException e) { - dstStatus = null; - } - if (dstStatus != null) { - if (srcStatus.isDirectory() != dstStatus.isDirectory()) { - throw new IOException("Source " + src + " Destination " + dst - + " both should be either file or directory"); - } - if (!overwrite) { - throw new FileAlreadyExistsException("rename destination " + dst - + " already exists."); - } - // Delete the destination that is a file or an empty directory - if (dstStatus.isDirectory()) { - FileStatus[] list = listStatus(dst); - if (list != null && list.length != 0) { - throw new IOException( - "rename cannot overwrite non empty destination directory " + dst); - } - } - delete(dst, false); - } else { - final Path parent = dst.getParent(); - final FileStatus parentStatus = getFileStatus(parent); - if (parentStatus == null) { - throw new FileNotFoundException("rename destination parent " + parent - + " not found."); - } - if (!parentStatus.isDirectory()) { - throw new ParentNotDirectoryException("rename destination parent " + parent - + " is a file."); - } - } - if (!rename(src, dst)) { - throw new IOException("rename from " + src + " to " + dst + " failed."); - } + /** + * Override point, create any custom rename callbacks. + * This only needs to be overridden if the subclass has + * optimized operations. + * @return callbacks for renaming. + */ + protected FileSystemRename3Action.RenameCallbacks createRenameCallbacks() { + return FileSystemRename3Action.callbacksFromFileSystem(this); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index 42410974db17c..f229d482f8795 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -30,6 +30,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; @@ -252,7 +253,7 @@ public boolean rename(Path src, Path dst) throws IOException { } @Override - protected void rename(Path src, Path dst, Rename... options) + public void rename(Path src, Path dst, Rename... options) throws IOException { fs.rename(src, dst, options); } @@ -742,4 +743,8 @@ public boolean hasPathCapability(final Path path, final String capability) } } + @Override + protected FileSystemRename3Action.RenameCallbacks createRenameCallbacks() { + return fs.createRenameCallbacks(); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java index 75bc12df8fdcf..d13c702a92425 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java @@ -216,23 +216,61 @@ static CreateOpts[] setOpt(final T newValue, } /** - * Enum to support the varargs for rename() options + * Enum to support the varargs for rename() options. + *

+ * The value of the rename can be used to marshall/unmarshall the option, + * and they MUST NOT be changed. The HDFS edit log + * uses these values -changing them would break everything. + *

+ * If new options are added, consider (a) how HDFS would handle it + * and (b) does {@link ChecksumFileSystem#rename(Path, Path, Rename...)} + * need to pick up any of the options when renaming any CRC files? */ public enum Rename { - NONE((byte) 0), // No options - OVERWRITE((byte) 1), // Overwrite the rename destination + /** + * No options. + */ + NONE((byte) 0), + + /** + * Overwrite any file/empty dir at the rename destination. + */ + OVERWRITE((byte) 1), + + /** + * Move the file to trash. + */ TO_TRASH ((byte) 2); // Rename to trash + /** + * Internal code, used as wire value. + */ private final byte code; - - private Rename(byte code) { + + /** + * Constructor. + * @param code option code. + */ + Rename(byte code) { this.code = code; } + /** + * Create a rename enum from the code, useful + * for unmarshalling. + * If an option code is not recognized, this will return + * null. + * @param code option code. + * @return the matching option or null. + */ public static Rename valueOf(byte code) { return code < 0 || code >= values().length ? null : values()[code]; } + /** + * Code of this option. + * @return the code to marshall. + */ public byte value() { return code; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java index 72eeb99a4ea5d..2ae6c26c24081 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -33,8 +33,11 @@ import java.io.FileDescriptor; import java.net.URI; import java.nio.ByteBuffer; +import java.nio.file.DirectoryIteratorException; +import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.NoSuchFileException; +import java.nio.file.StandardCopyOption; import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.BasicFileAttributeView; import java.nio.file.attribute.FileTime; @@ -46,6 +49,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.nativeio.NativeIO; @@ -1086,4 +1090,72 @@ public boolean hasPathCapability(final Path path, final String capability) return super.hasPathCapability(path, capability); } } + + /** + * Return the local-FS-specific rename callbacks, with + * better failure reporting. + * @return callbacks for renaming. + */ + @Override + protected FileSystemRename3Action.RenameCallbacks createRenameCallbacks() { + return new RenameToRawLocalFS(); + } + + /** + * Subclass of the normal filesystem rename3 callbacks which + * uses the Java 7 nio APIs. + */ + private final class RenameToRawLocalFS extends + FileSystemRename3Action.RenameCallbacksToFileSystem { + + private RenameToRawLocalFS() { + super(RawLocalFileSystem.this); + } + + @Override + public boolean directoryHasChildren(final FileStatus directory) + throws IOException { + try (DirectoryStream paths + = Files.newDirectoryStream( + pathToFile(directory.getPath()).toPath())) { + if (paths.iterator().hasNext()) { + // there's an entry, so fail + return true; + } else { + return false; + } + } catch (DirectoryIteratorException ex) { + throw ex.getCause(); + } + } + + /** + * This rename uses the java.nio.Files API + * to perform the copy and so fail meaningfully. + * {@inheritDoc} + * @param params + */ + @Override + public void executeRename( + final FileSystemRename3Action.ExecuteRenameParams params) + throws PathIOException, IOException { + super.executeRename(params); + File sourceFile = pathToFile(params.getSourcePath()); + File destFile = pathToFile(params.getDestPath()); + + if (params.isDeleteDest()) { + // when the test is to be cleaned up, we invoke the move + // operation with overwrite = true + Files.move( + sourceFile.toPath(), + destFile.toPath(), + StandardCopyOption.REPLACE_EXISTING); + } else { + // otherwise: no overwrite. + Files.move( + sourceFile.toPath(), + destFile.toPath()); + } + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemRename3Action.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemRename3Action.java new file mode 100644 index 0000000000000..a89a994ac75d9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemRename3Action.java @@ -0,0 +1,632 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.AbstractFileSystem; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.ParentNotDirectoryException; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.DurationInfo; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.FSExceptionMessages.*; + +/** + * Rename Support. + *

+ * This is to support different implementations of {@link FileSystem#rename(Path, Path, Options.Rename...)} + * with tuning for their underlying APIs. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class FileSystemRename3Action { + + private static final Logger LOG = + LoggerFactory.getLogger(FileSystemRename3Action.class); + + public static final Path ROOT = new Path("/"); + + public FileSystemRename3Action() { + } + + /** + * Execcute + * {@link FileSystem#rename(Path, Path, Options.Rename...)}, by first + * validating the arguments, then using the supplied + * {@link RenameCallbacks} implementation to probe the store and + * then execute the rename. + *

+ * @param params rename arguments + * @throws FileNotFoundException source path does not exist, or the parent + * path of dest does not exist. + * @throws FileAlreadyExistsException dest path exists and is a file + * @throws ParentNotDirectoryException if the parent path of dest is not + * a directory + * @throws IOException other failure + */ + public void rename(final RenameActionParams params) + throws IOException { + final Path sourcePath = params.getSourcePath(); + final Path destPath = params.getDestPath(); + final Options.Rename[] renameOptions = params.getRenameOptions(); + final RenameCallbacks callbacks = params.getRenameCallbacks(); + + LOG.debug("Rename {} tp {}", + sourcePath, + destPath); + + try { + final String srcStr = sourcePath.toUri().getPath(); + final String dstStr = destPath.toUri().getPath(); + if (dstStr.startsWith(srcStr) + && dstStr.charAt(srcStr.length() - 1) == Path.SEPARATOR_CHAR) { + throw new PathIOException(srcStr, + String.format(RENAME_DEST_UNDER_SOURCE, srcStr, + dstStr)); + } + if (sourcePath.isRoot()) { + throw new PathIOException(srcStr, RENAME_SOURCE_IS_ROOT); + } + if (destPath.isRoot()) { + throw new PathIOException(srcStr, RENAME_DEST_IS_ROOT); + } + if (sourcePath.equals(destPath)) { + throw new FileAlreadyExistsException( + String.format(RENAME_DEST_EQUALS_SOURCE, + srcStr, + dstStr)); + } + + boolean overwrite = false; + boolean deleteDestination = false; + + if (null != renameOptions) { + for (Options.Rename option : renameOptions) { + if (option == Options.Rename.OVERWRITE) { + overwrite = true; + } + } + } + final FileStatus sourceStatus = callbacks.getSourceStatus(sourcePath); + if (sourceStatus == null) { + throw new FileNotFoundException( + String.format(RENAME_SOURCE_NOT_FOUND, sourcePath)); + } + + final FileStatus destStatus = callbacks.getDestStatusOrNull(destPath); + + if (destStatus != null) { + // the destination exists. + + // The source and destination types must match. + if (sourceStatus.isDirectory() != destStatus.isDirectory()) { + throw new PathIOException(sourcePath.toString(), + String.format(RENAME_SOURCE_DEST_DIFFERENT_TYPE, + sourcePath, + destPath)); + } + // and the rename must permit overwrite + if (!overwrite) { + // the destination exists but overwrite = false + throw new FileAlreadyExistsException( + String.format(RENAME_DEST_EXISTS, destPath)); + } + // If the destination exists,is a directory, it must be empty + if (destStatus.isDirectory()) { + if (callbacks.directoryHasChildren(destStatus)) { + throw new PathIOException(sourcePath.toString(), + String.format(RENAME_DEST_NOT_EMPTY, + destPath)); + } + } + // declare that the destination must be deleted + deleteDestination = true; + } else { + // there was no destination status; the path does not + // exist. + + // verify the parent of the dest being a directory. + // this is implicit if the source and dest share the same parent, + // otherwise a probe is is needed. + // uses isDirectory so those stores which have optimised that + // are slightly more efficient on the success path. + // This is at the expense of a second check during failure + // to distinguish parent dir not existing from parent + // not being a file. + final Path destParent = destPath.getParent(); + final Path srcParent = sourcePath.getParent(); + if (!destParent.equals(srcParent) && destParent.isRoot()) { + // check that any non-root parent is a directory + callbacks.verifyIsDirectory(destParent); + } + } + + // and now, finally, the rename. + // log duration @ debug for the benefit of anyone wondering why + // renames are slow against object stores. + try (DurationInfo ignored = new DurationInfo(LOG, false, + "executing rename(%s %s, %s)", + sourceStatus.isFile() ? "file" : "directory", + srcStr, + destPath)) { + callbacks.executeRename( + new ExecuteRenameParams(sourceStatus, destPath, destStatus, + renameOptions, deleteDestination)); + } + } finally { + IOUtils.cleanupWithLogger(LOG, callbacks); + + } + } + + /** + * Create the rename callbacks from a filesystem. + * @param fileSystem filesystem to invoke. + * @return callbacks. + */ + public static RenameCallbacks callbacksFromFileSystem( + FileSystem fileSystem) { + return new RenameCallbacksToFileSystem(fileSystem); + } + + /** + * Create the rename callbacks into an AbstractFileSystem + * instance. + * @param abstractFileSystem filesystm to invoke. + * @return callbacks. + */ + public static RenameCallbacks callbacksFromFileContext( + AbstractFileSystem abstractFileSystem) { + return new RenameCallbacksToFileContext(abstractFileSystem); + } + + + /** + * Arguments for the rename validation operation. + */ + public static class RenameActionParams { + + private final Path sourcePath; + + private final Path destPath; + + private final RenameCallbacks renameCallbacks; + + private final Options.Rename[] renameOptions; + + /** + * @param sourcePath qualified source + * @param destPath qualified dest. + * @param renameCallbacks callbacks + * @param renameOptions options + */ + private RenameActionParams(final Path sourcePath, + final Path destPath, + final RenameCallbacks renameCallbacks, + final Options.Rename[] renameOptions) { + this.sourcePath = sourcePath; + this.destPath = destPath; + this.renameCallbacks = renameCallbacks; + this.renameOptions = renameOptions; + } + + Path getSourcePath() { + return sourcePath; + } + + Path getDestPath() { + return destPath; + } + + public RenameCallbacks getRenameCallbacks() { + return renameCallbacks; + } + + Options.Rename[] getRenameOptions() { + return renameOptions; + } + } + + /** + * Builder for the {@link RenameActionParams} arguments. + */ + public static class RenameValidationBuilder { + + private Path sourcePath; + + + private Path destPath; + + private RenameCallbacks renameCallbacks; + + private Options.Rename[] renameOptions; + + public RenameValidationBuilder withSourcePath(final Path sourcePath) { + this.sourcePath = sourcePath; + return this; + } + + + public RenameValidationBuilder withDestPath(final Path destPath) { + this.destPath = destPath; + return this; + } + + public RenameValidationBuilder withRenameCallbacks( + final RenameCallbacks renameCallbacks) { + this.renameCallbacks = renameCallbacks; + return this; + } + + + public RenameValidationBuilder withRenameOptions( + final Options.Rename... renameOptions) { + this.renameOptions = renameOptions; + return this; + } + + public RenameActionParams build() { + return new RenameActionParams(sourcePath, + destPath, + renameCallbacks, + renameOptions); + } + } + + /** + * Result of the rename. + * Extensible in case there's a desire to add data in future + * (statistics, results of FileStatus calls, etc) + */ + public static class RenameValidationResult { + + } + + /** + * Callbacks which must be implemented to support rename. + *

+ * These are used to validate/prepare the operation and the + * final action. + */ + public interface RenameCallbacks extends Closeable { + + /** + * Test for a directory having children. + *

+ * This is invoked knowing that the destination exists + * and is a directory. + * @param directory the file status of the destination. + * @return true if the path has one or more child entries. + * @throws IOException for IO problems. + */ + boolean directoryHasChildren(FileStatus directory) + throws IOException; + + /** + * Verify that the parent path of a rename is a directory + * @param destParent parent path + * @throws ParentNotDirectoryException it isn't + * @throws IOException any other IO Failure + */ + void verifyIsDirectory(Path destParent) + throws ParentNotDirectoryException, IOException; + + /** + * Get the status of the source. + * @param sourcePath source + * @return status of source + * @throws FileNotFoundException no source found + * @throws IOException any other IO failure + */ + FileStatus getSourceStatus(Path sourcePath) + throws FileNotFoundException, IOException; + + /** + * Get the destination status. + * Any failure in the probe is considered to mean "no destination" + * @param destPath destination path. + * @return the file status, or null for a failure. + */ + FileStatus getDestStatusOrNull(Path destPath) throws IOException; + + /** + * Execute the final rename, by invoking + * {@link FileSystem#rename(Path, Path)}. + * If the method returns "false", a + * PathIOException is raised. + *

+ * If a FileSystem can throw more meaningful + * exceptions here, users will appreciate it. + * + * @param params @throws IOException IO failure + * @throws PathIOException IO failure + * + */ + void executeRename(final ExecuteRenameParams params) + throws PathIOException, IOException; + } + + /** + * Implementation of {@link RenameCallbacks} which uses the + * FileSystem APIs. + *

+ * If FS implementations can do this more efficiently + * or fail rename better than {@link FileSystem#rename(Path, Path)} + * does, they should subclass/reimplement this. + */ + public static class RenameCallbacksToFileSystem + implements RenameCallbacks { + + /** + * FS to invoke. + */ + private final FileSystem fileSystem; + + /** + * Construct with the given filesystem. + * @param fileSystem target FS. + */ + protected RenameCallbacksToFileSystem(final FileSystem fileSystem) { + this.fileSystem = requireNonNull(fileSystem); + } + + /** + * Get the filesystem. + * @return + */ + protected FileSystem getFileSystem() { + return fileSystem; + } + + @Override + public void close() throws IOException { + + } + + @Override + public boolean directoryHasChildren(final FileStatus directory) + throws IOException { + // get an iterator and then see if it is of size + // one or more. + // we use this for more efficiency with larger directories + // on those clients which do paged downloads. + return fileSystem.listStatusIterator(directory.getPath()) + .hasNext(); + } + + @Override + public void verifyIsDirectory(final Path destParent) + throws ParentNotDirectoryException, IOException { + if (!fileSystem.getFileStatus(destParent).isDirectory()) { + throw new ParentNotDirectoryException( + String.format(RENAME_DEST_PARENT_NOT_DIRECTORY, destParent)); + } + } + + @Override + public FileStatus getSourceStatus(Path sourcePath) + throws FileNotFoundException, IOException { + return fileSystem.getFileLinkStatus(sourcePath); + } + + @Override + public FileStatus getDestStatusOrNull(Path destPath) throws IOException { + try { + return fileSystem.getFileLinkStatus(destPath); + } catch (FileNotFoundException e) { + return null; + } + } + + /** + * Execute the final rename, by invoking + * {@link FileSystem#rename(Path, Path)}. + * If the method returns "false", a + * PathIOException is raised. + *

+ * If a FileSystem can throw more meaningful + * exceptions here, users will appreciate it. + * {@inheritDoc}. + * @param params + */ + @Override + public void executeRename(final ExecuteRenameParams params) + throws PathIOException, IOException { + + Path sourcePath = params.getSourcePath(); + Path destPath = params.getDestPath(); + if (params.isDeleteDest()) { + fileSystem.delete(destPath, false); + } + if (!fileSystem.rename(sourcePath, destPath)) { + // inner rename failed, no obvious cause + throw new PathIOException(sourcePath.toString(), + String.format(RENAME_FAILED, sourcePath, + destPath)); + } + } + + } + + /** + * Implementation of {@link RenameCallbacks} which uses the + * FileSystem APIs. This may be suboptimal, if FS implementations + * can implement empty directory probes/deletes better themselves. + */ + private static final class RenameCallbacksToFileContext + implements RenameCallbacks { + + /** + * FS to invoke. + */ + private final AbstractFileSystem fileSystem; + + + /** + * Construct with the given filesystem. + * @param fileSystem target FS. + */ + private RenameCallbacksToFileContext( + final AbstractFileSystem fileSystem) { + this.fileSystem = requireNonNull(fileSystem); + } + + @Override + public void close() throws IOException { + + } + + @Override + public boolean directoryHasChildren(final FileStatus directory) + throws IOException { + Path path = directory.getPath(); + // get an iterator and then see if it is of size + // one or more. + // we use this for more efficiency with larger directories + // on those clients which do paged downloads. + return fileSystem.listStatusIterator(path).hasNext(); + } + + @Override + public void verifyIsDirectory(final Path destParent) + throws ParentNotDirectoryException, IOException { + // check + if (!fileSystem.getFileStatus(destParent).isDirectory()) { + throw new ParentNotDirectoryException( + String.format(RENAME_DEST_PARENT_NOT_DIRECTORY, destParent)); + } + } + + @Override + public FileStatus getSourceStatus(Path sourcePath) + throws FileNotFoundException, IOException { + return fileSystem.getFileLinkStatus(sourcePath); + } + + @Override + public FileStatus getDestStatusOrNull(Path destPath) throws IOException { + try { + return fileSystem.getFileLinkStatus(destPath); + } catch (FileNotFoundException e) { + return null; + } + } + + /** + * Execute the final rename, by invoking + * {@link FileSystem#rename(Path, Path)}. + * If the method returns "false", a + * PathIOException is raised. + *

+ * If a FileSystem can throw more meaningful + * exceptions here, users will appreciate it. + * {@inheritDoc}. + * @param params + */ + @Override + public void executeRename(final ExecuteRenameParams params) + throws PathIOException, IOException { + Path sourcePath = params.getSourcePath(); + Path destPath = params.getDestPath(); + if (params.isDeleteDest()) { + fileSystem.delete(destPath, false); + } + fileSystem.renameInternal(sourcePath, destPath); + } + + } + + /** + * Parameters for {@link RenameCallbacks#executeRename(ExecuteRenameParams)}. + *

+ * Made a parameter object so if we extend it, external implementations + * of the {@link RenameCallbacks} interface won't encounter link + * problems. + */ + public static class ExecuteRenameParams { + + private final FileStatus sourceStatus; + + private final Path destPath; + + @Nullable private final FileStatus destStatus; + + private final Options.Rename[] renameOptions; + + private final boolean deleteDest; + + /** + * @param sourceStatus source FS status + * @param destPath path of destination + * @param destStatus status of destination + * @param renameOptions any rename options + * @param deleteDest delete destination path + */ + public ExecuteRenameParams(final FileStatus sourceStatus, + final Path destPath, + @Nullable final FileStatus destStatus, + final Options.Rename[] renameOptions, + final boolean deleteDest) { + this.sourceStatus = sourceStatus; + this.destPath = destPath; + this.destStatus = destStatus; + this.renameOptions = renameOptions; + this.deleteDest = deleteDest; + } + + public FileStatus getSourceStatus() { + return sourceStatus; + } + + public Path getSourcePath() { + return sourceStatus.getPath(); + } + + public FileStatus getDestStatus() { + return destStatus; + } + + public Path getDestPath() { + return destPath; + } + + public Options.Rename[] getRenameOptions() { + return renameOptions; + } + + public boolean isDeleteDest() { + return deleteDest; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 665e328447d5b..62d4a039c8208 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -88,7 +88,7 @@ Get the status of a path stat.length = len(FS.Files[p]) stat.isdir = False stat.blockSize > 0 - elif isDir(FS, p) : + elif isDirectory(FS, p) : stat.length = 0 stat.isdir = True elif isSymlink(FS, p) : @@ -177,7 +177,7 @@ Path `path` must exist: elif isFile(FS, path) and not filter.accept(P) : result = [] - elif isDir(FS, path): + elif isDirectory(FS, path): result = [ getFileStatus(c) for c in children(FS, path) if filter.accepts(c) ] @@ -346,7 +346,7 @@ The operation generates a set of results, `resultset`, equal to the result of elif isFile(FS, path) and not filter.accept(path) : resultset = [] - elif isDir(FS, path) : + elif isDirectory(FS, path) : resultset = [ getLocatedFileStatus(FS, c) for c in children(FS, path) where filter.accept(c) @@ -433,7 +433,7 @@ Where def blocks(FS, p, s, s + l) = a list of the blocks containing data(FS, path)[s:s+l] -Note that that as `length(FS, f) ` is defined as `0` if `isDir(FS, f)`, the result +Note that that as `length(FS, f) ` is defined as `0` if `isDirectory(FS, f)`, the result of `getFileBlockLocations()` on a directory is `[]` @@ -546,7 +546,7 @@ Create a directory and all its parents. The path must either be a directory or not exist - if exists(FS, p) and not isDir(FS, p) : + if exists(FS, p) and not isDirectory(FS, p) : raise [ParentNotDirectoryException, FileAlreadyExistsException, IOException] No ancestor may be a file @@ -591,7 +591,7 @@ The file must not exist for a no-overwrite create: Writing to or overwriting a directory must fail. - if isDir(FS, p) : raise {FileAlreadyExistsException, FileNotFoundException, IOException} + if isDirectory(FS, p) : raise {FileAlreadyExistsException, FileNotFoundException, IOException} No ancestor may be a file @@ -962,7 +962,7 @@ directories, which will always remain (see below for special coverage of root di A directory with children and `recursive == False` cannot be deleted - if isDir(FS, p) and not recursive and (children(FS, p) != {}) : raise IOException + if isDirectory(FS, p) and not recursive and (children(FS, p) != {}) : raise IOException (HDFS raises `PathIsNotEmptyDirectoryException` here.) @@ -996,8 +996,6 @@ Deleting an empty root does not change the filesystem state and may return true or false. if isRoot(p) and children(FS, p) == {} : - FS ' = FS - result = (undetermined) There is no consistent return code from an attempt to delete the root directory. @@ -1012,7 +1010,7 @@ return value from overreacting. Deleting an empty directory that is not root will remove the path from the FS and return true. - if isDir(FS, p) and not isRoot(p) and children(FS, p) == {} : + if isDirectory(FS, p) and not isRoot(p) and children(FS, p) == {} : FS' = (FS.Directories - [p], FS.Files, FS.Symlinks) result = True @@ -1026,7 +1024,7 @@ can generally have three outcomes: the correct permissions to delete everything, they are free to do so (resulting in an empty filesystem). - if isDir(FS, p) and isRoot(p) and recursive : + if isDirectory(FS, p) and isRoot(p) and recursive : FS' = ({["/"]}, {}, {}, {}) result = True @@ -1034,7 +1032,7 @@ they are free to do so (resulting in an empty filesystem). filesystem must be taken offline and reformatted if an empty filesystem is desired. - if isDir(FS, p) and isRoot(p) and recursive : + if isDirectory(FS, p) and isRoot(p) and recursive : FS' = FS result = False @@ -1071,11 +1069,11 @@ adverse consequences of the simpler permissions models of stores. Deleting a non-root path with children `recursive==true` removes the path and all descendants - if isDir(FS, p) and not isRoot(p) and recursive : + if isDirectory(FS, p) and not isRoot(p) and recursive : FS' where: - not isDir(FS', p) + not isDirectory(FS', p) and forall d in descendants(FS, p): - not isDir(FS', d) + not isDirectory(FS', d) not isFile(FS', d) not isSymlink(FS', d) result = True @@ -1093,174 +1091,13 @@ removes the path and all descendants * Object Stores and other non-traditional filesystems onto which a directory tree is emulated, tend to implement `delete()` as recursive listing and entry-by-entry delete operation. -This can break the expectations of client applications for O(1) atomic directory +This can break the expectations of client applications for `O(1)` atomic directory deletion, preventing the stores' use as drop-in replacements for HDFS. -### `boolean rename(Path src, Path d)` - -In terms of its specification, `rename()` is one of the most complex operations within a filesystem . - -In terms of its implementation, it is the one with the most ambiguity regarding when to return false -versus raising an exception. - -Rename includes the calculation of the destination path. -If the destination exists and is a directory, the final destination -of the rename becomes the destination + the filename of the source path. - - let dest = if (isDir(FS, src) and d != src) : - d + [filename(src)] - else : - d - -#### Preconditions - -All checks on the destination path MUST take place after the final `dest` path -has been calculated. - -Source `src` must exist: - - exists(FS, src) else raise FileNotFoundException - - -`dest` cannot be a descendant of `src`: - - if isDescendant(FS, src, dest) : raise IOException - -This implicitly covers the special case of `isRoot(FS, src)`. - -`dest` must be root, or have a parent that exists: - - isRoot(FS, dest) or exists(FS, parent(dest)) else raise IOException - -The parent path of a destination must not be a file: - - if isFile(FS, parent(dest)) : raise IOException - -This implicitly covers all the ancestors of the parent. - -There must not be an existing file at the end of the destination path: - - if isFile(FS, dest) : raise FileAlreadyExistsException, IOException - - -#### Postconditions - - -##### Renaming a directory onto itself - -Renaming a directory onto itself is no-op; return value is not specified. - -In POSIX the result is `False`; in HDFS the result is `True`. - - if isDir(FS, src) and src == dest : - FS' = FS - result = (undefined) - - -##### Renaming a file to self - -Renaming a file to itself is a no-op; the result is `True`. - - if isFile(FS, src) and src == dest : - FS' = FS - result = True - - -##### Renaming a file onto a nonexistent path - -Renaming a file where the destination is a directory moves the file as a child - of the destination directory, retaining the filename element of the source path. - - if isFile(FS, src) and src != dest: - FS' where: - not exists(FS', src) - and exists(FS', dest) - and data(FS', dest) == data (FS, dest) - result = True - - - -##### Renaming a directory onto a directory - -If `src` is a directory then all its children will then exist under `dest`, while the path -`src` and its descendants will no longer exist. The names of the paths under -`dest` will match those under `src`, as will the contents: - - if isDir(FS, src) isDir(FS, dest) and src != dest : - FS' where: - not exists(FS', src) - and dest in FS'.Directories] - and forall c in descendants(FS, src) : - not exists(FS', c)) - and forall c in descendants(FS, src) where isDir(FS, c): - isDir(FS', dest + childElements(src, c) - and forall c in descendants(FS, src) where not isDir(FS, c): - data(FS', dest + childElements(s, c)) == data(FS, c) - result = True - -##### Renaming into a path where the parent path does not exist - - not exists(FS, parent(dest)) - -There is no consistent behavior here. - -*HDFS* - -The outcome is no change to FileSystem state, with a return value of false. - - FS' = FS; result = False - -*Local Filesystem* - -The outcome is as a normal rename, with the additional (implicit) feature -that the parent directories of the destination also exist. - - exists(FS', parent(dest)) - -*Other Filesystems (including Swift) * - -Other filesystems strictly reject the operation, raising a `FileNotFoundException` - -##### Concurrency requirements - -* The core operation of `rename()`—moving one entry in the filesystem to -another—MUST be atomic. Some applications rely on this as a way to coordinate access to data. - -* Some FileSystem implementations perform checks on the destination -FileSystem before and after the rename. One example of this is `ChecksumFileSystem`, which -provides checksummed access to local data. The entire sequence MAY NOT be atomic. - -##### Implementation Notes - -**Files open for reading, writing or appending** - -The behavior of `rename()` on an open file is unspecified: whether it is -allowed, what happens to later attempts to read from or write to the open stream - -**Renaming a directory onto itself** - -The return code of renaming a directory onto itself is unspecified. - -**Destination exists and is a file** - -Renaming a file atop an existing file is specified as failing, raising an exception. - -* Local FileSystem : the rename succeeds; the destination file is replaced by the source file. - -* HDFS : The rename fails, no exception is raised. Instead the method call simply returns false. - -**Missing source file** - -If the source file `src` does not exist, `FileNotFoundException` should be raised. - -HDFS fails without raising an exception; `rename()` merely returns false. +### `boolean rename(Path source, Path dest)` - FS' = FS - result = false +See [Renaming File and Directories](renaming.html). -The behavior of HDFS here should not be considered a feature to replicate. -`FileContext` explicitly changed the behavior to raise an exception, and the retrofitting of that action -to the `DFSFileSystem` implementation is an ongoing matter for debate. ### `void concat(Path p, Path sources[])` @@ -1319,7 +1156,7 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep if not exists(FS, p) : raise FileNotFoundException - if isDir(FS, p) : raise [FileNotFoundException, IOException] + if isDirectory(FS, p) : raise [FileNotFoundException, IOException] if newLength < 0 || newLength > len(FS.Files[p]) : raise HadoopIllegalArgumentException diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md index df538ee6cf96b..c4cbdec7f1372 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md @@ -35,6 +35,7 @@ HDFS as these are commonly expected by Hadoop client applications. 1. [FSDataInputStream class](fsdatainputstream.html) 1. [PathCapabilities interface](pathcapabilities.html) 1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html) +1. [Renaming File and Directories](renaming.html) 2. [Testing with the Filesystem specification](testing.html) 2. [Extending the specification and its tests](extending.html) 1. [Uploading a file using Multiple Parts](multipartuploader.html) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/rename.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/rename.md new file mode 100644 index 0000000000000..24d10b0c1f1b1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/rename.md @@ -0,0 +1,458 @@ + + +# Renaming File and Directories + +The `rename()` operation is the one Which causes the most problems for anyone implementing +a filesystem or Hadoop FS client + + +It has a number of key problems + +* It is a critical part of algorithms used to commit work in MapReduce, Spark, Hive, etc. +* It is an operation which spans multiple directories yet is expected to be a atomic, so all + stores have to implement some lock mechanism at a larger granularity than just a + single directory. +* Any file system with mount points and multiple volumes is going to fail when any attempt is made to rename eight file or directory to a different mount point/volume etc. There is no way to probe for rename() being permitted without actually trying. +* Encryption zones can have similar issues. +* In object stores where the notion of "directory" does not exist, a lot of effort has to go into making rename pretend to work, yet the atomicity requirements of commit algorithms are unlikely to be satisfied. +* Nobody really understand all the nuances of the semantics of rename +* The original `FileSystem.rename()` has behaviors which do not match that of POSIX and which are unwelcome. +* When the local filesystem's rename operations are invoked they are (a) platform dependent and (b) accessed via Java's rename wrapper, which itself has similar issues. Indeed, the `java.io.FilesSystem.rename()` call +is clearly something that Hadoop's rename/2 call is based on. + +## Notation; rename/2 and rename/3. + +This document uses the term + +* *rename/2* : the `FileSystm.rename(source, dest)` call with two arguments ("arity of two"). +* *rename/3* : the `rename(source, dest, options)` call with three arguments ("arity of three") which +is implemented in both `FileContext` and `FileSystem` *and is implemented consistently in both places. + +Of the two, rename/3 is better because it eliminates the "helpful" destination path recalculation, +resolves ambiguity about what renaming a directory into an empty directory does, *and* fails +meaningfully. + +### `boolean rename(Path source, Path dest, Rename...options))` , "rename/3" + +### `void rename(Path source, Path dest, Rename...options)` + + + +This is a better implementation of path rename than its predecessor +`rename(src, dest)` (rename/2). + + +1. Does not redefine the destination path based on whether or not the supplied +destination is a directory and the source a file. +1. Defines the policy on overwriting paths. +1. Defines the policy on nonexistent source paths. +1. MUST raise an exception on all failure conditions. +1. Has no return code. If the method does not raise an exception, it has +succeeded. + +The semantics of this method are simpler and more clearly defined. + +- The final path of the rename is always that of the `dest` argument; there +is no attempt to generate a new path underneath a target directory. +- If that destination exists, unless the `Rename.OVERWRITE` is set, an exception +is raised. +- The parent directory MUST exist. + +This avoids the "quirks" of `rename(source, dest)` where the behaviour +of the rename depends on the type and state of the destination. + + +The `Rename` enumeration has three values: + +* `NONE`: This is a no-op entry. +* `OVERWRITE`: overwrite any destination file or empty destination directory. +* `TRASH`: flag to indicate that the destination is the trash portion of + the filesystem. This flag is used in HDFS to verify that the caller has + the appropriate delete permission as well as the rename permission. + +As multiple entries can be supplied, all probes for features are done by +checking for the specific flag in the options list. There is no requirement of +ordering, and it is not an error if there are duplicate entries. + +```python +let overwrite = Rename.OVERWRITE in options +``` + +#### Preconditions + +Source `source` MUST exist: + +```python +exists(FS, source) else raise FileNotFoundException +``` + + +`source` MUST NOT be root: + +```python +if isRoot(FS, source)): raise IOException +``` + +`dest` MUST NOT equal `source` + +```python +source != dest else raise FileAlreadyExistsException +``` + +`dest` MUST NOT be a descendant of `source`: + +```python +if isDescendant(FS, source, dest) : raise IOException +``` + +`dest` MUST NOT be root: + +```python +if isRoot(FS, dest)): raise IOException +``` + + +The parent directory of `dest` MUST exist: + +```python +exists(FS, parent(dest)) else raise FileNotFoundException +``` + + +The parent path of a destination MUST be a directory: + +```python +isDirectory(FS, parent(dest)) else raise ParentNotDirectoryException, IOException +``` + +This implicitly covers all the ancestors of the parent. + +If `dest` exists the type of the entry MUST match +that of the source. + +```python +if exists(FS, dest) and isFile(FS, dest) and not isFile(FS, src): raise IOException + +if exists(FS, dest) and isDirectory(FS, dest) and not isDirectory(FS, src): raise IOException +``` + + +There must be not be an existing file at the end of the destination path unless +`Rename.OVERWRITE` was set. + +```python +if isFile(FS, dest) and not overwrite: raise FileAlreadyExistsException +``` + + +If the source is a directory, there must be not be an existing directory at the end of +the destination path unless `Rename.OVERWRITE` was set. + +```python +if isDirectory(FS, dest) and not overwrite and isDirectory(FS, source): raise FileAlreadyExistsException +``` + + +If there is a directory at the end of the path, it must be empty, irrespective of +the overwrite flag: + +```python +if isDirectory(FS, source) and isDirectory(FS, dest) and len(listStatus(FS, dest)) > 0: + raise IOException +``` + +Note how this is different from `rename(src, dest)`. + + +#### Postconditions + +##### Renaming a file onto an empty path or an existing file + +Renaming a file to a path where there is no entry moves the file to the +destination path. If the destination exists, the operation MUST fail unless the +`overwrite` option is not set. If `overwrite` is set the semantics are the same +as renaming onto an empty path. + +```python +if isFile(FS, source) and not exists(dest) or (isFile(dest) and overwrite): + FS' where: + exists(FS', dest) + and data(FS', dest) == data(FS, source) + and not exists(FS', source) +``` + +##### Renaming a directory + +If `source` is a directory then all its children will then exist under `dest` in `FS'`, while the path +`source` and its descendants will no longer exist. The names of the paths under +`dest` will match those under `source`, as will the contents: + +```python +if isDirectory(FS, source) : + FS' where: + isDirectory(FS, dest) + and forall c in descendants(FS, source) where isDirectory(FS, c): + isDirectory(FS', dest + childElements(source, c)) + and forall c in descendants(FS, source) where not isDirectory(FS, c): + data(FS', dest + childElements(s, c)) == data(FS, c) + and not exists(FS', source) + and forall c in descendants(FS, source): + not exists(FS', c)) +``` + +##### Concurrency requirements + +* The core operation of rename/3, moving one entry in the filesystem to +another, SHOULD be atomic. Many applications rely on this as a way to commit operations. + +* However, the base implementation is *not* atomic; some of the precondition checks +are performed separately. + +HDFS's rename/3 operation *is* atomic; other filesystems SHOULD follow its example. + +##### Implementation Notes + +**Files open for reading, writing or appending** + +The behavior of rename/3 on an open file is unspecified: whether it is +allowed, what happens to later attempts to read from or write to the open stream + + +### `boolean rename(Path source, Path dest)` , "rename/2" + +In terms of its specification, `rename(Path source, Path dest)` is one of the most +complex APIs operations within a filesystem. + +In terms of its implementation, it is the one with the most ambiguity regarding when to return false +versus raising an exception. This makes less useful to use than +others: in production code it is usually wrapped by code to raise an +exception when `false` is returned. + +```java +if (!fs.rename(source, dest)) { + throw new IOException("rename failed!!!"); +} +``` + +The weakness of this approach is not just that all uses need to be so +guarded, the actual cause of the failure is lost. + +Rename/2 includes the calculation of the destination path. +If the source is a file, and the destination is an (existing) +directory, the final destination +of the rename becomes the destination + the filename of the source path. + +```python +let dest' = if (isFile(FS, source) and isDirectory(FS, dest)) : + path(dest, filename(source)) + else : + dest +``` + +This is a needless complication whose origin is believed to have been +based on experience with the unix `mv` command, rather than the posix API. + + + +#### Preconditions + +All checks on the destination path MUST take place after the final `dest` path +has been calculated. + +Source `source` must exist: + +```python +exists(FS, source) else raise FileNotFoundException +``` + +`dest` cannot be a descendant of `source`: + +```python +if isDescendant(FS, source, dest) : raise IOException +``` + +This implicitly covers the special case of `isRoot(FS, source)`. + +`dest` must be root, or have a parent that exists: + +```python +isRoot(FS, dest) or exists(FS, parent(dest)) else raise IOException +``` + +The parent path of a destination must not be a file: + +```python +if isFile(FS, parent(dest)) : raise IOException +``` + +This implicitly covers all the ancestors of the parent. + +There must not be an existing file at the end of the destination path: + +```python +if isFile(FS, dest) : raise FileAlreadyExistsException, IOException +``` + + +#### Postconditions + + +##### Renaming a directory onto itself + +Renaming a directory onto itself is no-op; return value is not specified. + +In POSIX the result is `False`; in HDFS the result is `True`. + +```python +if isDirectory(FS, source) and source == dest : + FS' = FS + result = (undefined) +``` + + +##### Renaming a file to self + +Renaming a file to itself is a no-op; the result is `True`. + +```python +if isFile(FS, source) and source == dest : + FS' = FS + result = True +``` + + + +##### Renaming a file onto a nonexistent path + +Renaming a file where the destination is a directory moves the file as a child + of the destination directory, retaining the filename element of the source path. + +```python +if isFile(FS, source) and source != dest: + FS' where: + not exists(FS', source) + and exists(FS', dest) + and data(FS', dest) == data (FS, dest) + result = True +``` + + + +##### Renaming a directory under a directory + +If `source` is a directory then all its children will then exist under `dest`, while the path +`source` and its descendants will no longer exist. The names of the paths under +`dest` will match those under `source`, as will the contents: + +```python +if isDirectory(FS, source) and source != dest : + FS' where: + not exists(FS', source) + and dest in FS'.Directories + and forall c in descendants(FS, source) : + not exists(FS', c)) + and forall c in descendants(FS, source) where isDirectory(FS, c): + isDirectory(FS', dest + childElements(source, c)) + and forall c in descendants(FS, source) where not isDirectory(FS, c): + data(FS', dest + childElements(s, c)) == data(FS, c) +``` + + + +##### Renaming into a path where the parent path does not exist + +```python +not exists(FS, parent(dest)) +``` + +There is no consistent behavior here. + +*HDFS* + +The outcome is no change to FileSystem state, with a return value of false. + +```python +FS' = FS; result = False +``` + + +*Local Filesystem* + +The outcome is as a normal rename, with the additional (implicit) feature +that the parent directories of the destination also exist. + +```python +exists(FS', parent(dest)) +``` + + +*Other Filesystems (including Swift) * + +Other filesystems strictly reject the operation, raising a `FileNotFoundException` + +##### Concurrency requirements + +* The core operation of `rename/2`, moving one entry in the filesystem to +another, MUST be atomic. Many applications rely on this as a way to commit operations. + +* Some FileSystem implementations perform checks on the destination +FileSystem before and after the rename. One example of this is `ChecksumFileSystem`, which +provides checksummed access to local data. The entire sequence MAY NOT be atomic. + +##### Implementation Notes + +**Files open for reading, writing or appending** + +The behavior of `rename/2` on an open file is unspecified: whether it is +allowed, what happens to later attempts to read from or write to the open stream + +**Renaming a directory onto itself** + +The return code of renaming a directory onto itself is unspecified. + +**Destination exists and is a file** + +Renaming a file atop an existing file is specified as failing, raising an exception. + +* Local FileSystem : the rename succeeds; the destination file is replaced by the source file. + +* HDFS : The rename fails, no exception is raised. Instead the method call simply returns false. + +**Missing source file** + +If the source file `source` does not exist, `FileNotFoundException` should be raised. + +HDFS fails without raising an exception; `rename()` merely returns false. + + FS' = FS + result = false + +The behavior of HDFS here must not be considered a feature to replicate. + + +## References + +- [Posix `rename()`](https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html) + +- [rename(2) — Linux manual page](https://man7.org/linux/man-pages/man2/rename.2.html) + +> If newpath already exists, it will be atomically replaced, so that +there is no point at which another process attempting to access +newpath will find it missing. However, there will probably be a +window in which both oldpath and newpath refer to the file being +renamed. + +Linux extends the Posix arity-2 rename with their own rename/3 API call, one which blocks +overwriting. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FSMainOperationsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FSMainOperationsBaseTest.java index f0c00c4cdeef8..85cab43f44186 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FSMainOperationsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FSMainOperationsBaseTest.java @@ -32,7 +32,16 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.eclipse.jetty.util.log.Log; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; /** *

@@ -49,7 +58,7 @@ * Since this a junit 4 you can also do a single setup before * the start of any tests. * E.g. - * @BeforeClass public static void clusterSetupAtBegining() + * @BeforeClass public static void clusterSetupAtBeginning() * @AfterClass public static void ClusterShutdownAtEnd() *

*/ @@ -79,9 +88,9 @@ public boolean accept(Path file) { return true; else return false; - } + } }; - + protected static final byte[] data = getFileData(numBlocks, getDefaultBlockSize()); @@ -96,31 +105,54 @@ public FSMainOperationsBaseTest(String testRootDir) { @Before public void setUp() throws Exception { - fSys = createFileSystem(); - fSys.mkdirs(getTestRootPath(fSys, "test")); + checkNotNull(testRootDir, "testRootDir"); + fSys = checkNotNull(createFileSystem(), "filesystem"); + fSys.mkdirs(checkNotNull(getTestRootPath(fSys, "test"))); } @After public void tearDown() throws Exception { - fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true); + if (fSys != null) { + fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true); + } } - - + protected Path getDefaultWorkingDirectory() throws IOException { return getTestRootPath(fSys, "/user/" + System.getProperty("user.name")).makeQualified( fSys.getUri(), fSys.getWorkingDirectory()); } + /** + * Override point: is rename supported. + * @return true if rename is supported. + */ protected boolean renameSupported() { return true; } - - protected IOException unwrapException(IOException e) { - return e; + /** + * Skip the test if rename is not supported. + */ + protected void assumeRenameSupported() { + assumeTrue("Rename is not supported", renameSupported()); } - + + /** + * Override point: are posix-style permissions supported. + * @return true if permissions are supported. + */ + protected boolean permissionsSupported() { + return true; + } + + /** + * Skip the test if permissions are not supported. + */ + protected void assumePermissionsSupported() { + assumeTrue("Permissions are not supported", permissionsSupported()); + } + @Test public void testFsStatus() throws Exception { FsStatus fsStatus = fSys.getStatus(null); @@ -254,19 +286,19 @@ public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception { } @Test - public void testGetFileStatusThrowsExceptionForNonExistentFile() - throws Exception { + public void testGetFileStatusThrowsExceptionForNonExistentFile() + throws Exception { try { fSys.getFileStatus(getTestRootPath(fSys, "test/hadoop/file")); Assert.fail("Should throw FileNotFoundException"); } catch (FileNotFoundException e) { // expected } - } + } @Test public void testListStatusThrowsExceptionForNonExistentFile() - throws Exception { + throws Exception { try { fSys.listStatus(getTestRootPath(fSys, "test/hadoop/file")); Assert.fail("Should throw FileNotFoundException"); @@ -278,6 +310,7 @@ public void testListStatusThrowsExceptionForNonExistentFile() @Test public void testListStatusThrowsExceptionForUnreadableDir() throws Exception { + assumePermissionsSupported(); Path testRootDir = getTestRootPath(fSys, "test/hadoop/dir"); Path obscuredDir = new Path(testRootDir, "foo"); Path subDir = new Path(obscuredDir, "bar"); //so foo is non-empty @@ -633,6 +666,7 @@ public void testGlobStatusFilterWithMultiplePathWildcardsAndNonTrivialFilter() @Test public void testGlobStatusThrowsExceptionForUnreadableDir() throws Exception { + assumePermissionsSupported(); Path testRootDir = getTestRootPath(fSys, "test/hadoop/dir"); Path obscuredDir = new Path(testRootDir, "foo"); Path subDir = new Path(obscuredDir, "bar"); //so foo is non-empty @@ -681,9 +715,9 @@ protected void writeReadAndDelete(int len) throws IOException { fSys.mkdirs(path.getParent()); - FSDataOutputStream out = + FSDataOutputStream out = fSys.create(path, false, 4096, (short) 1, getDefaultBlockSize() ); - out.write(data, 0, len); + out.write(data, 0, len); out.close(); Assert.assertTrue("Exists", exists(fSys, path)); @@ -691,7 +725,7 @@ protected void writeReadAndDelete(int len) throws IOException { FSDataInputStream in = fSys.open(path); byte[] buf = new byte[len]; - in.readFully(0, buf); + in.readFully(0, buf); in.close(); Assert.assertEquals(len, buf.length); @@ -715,18 +749,18 @@ public void testOverwrite() throws IOException { Assert.assertTrue("Exists", exists(fSys, path)); Assert.assertEquals("Length", data.length, fSys.getFileStatus(path).getLen()); - + try { createFile(path); Assert.fail("Should throw IOException."); } catch (IOException e) { // Expected } - + FSDataOutputStream out = fSys.create(path, true, 4096); - out.write(data, 0, data.length); + out.write(data, 0, data.length); out.close(); - + Assert.assertTrue("Exists", exists(fSys, path)); Assert.assertEquals("Length", data.length, fSys.getFileStatus(path).getLen()); @@ -772,7 +806,7 @@ public void testDeleteRecursively() throws IOException { Assert.assertTrue("File still exists", exists(fSys, file)); Assert.assertTrue("Dir still exists", exists(fSys, dir)); Assert.assertTrue("Subdir still exists", exists(fSys, subdir)); - + Assert.assertTrue("Deleted", fSys.delete(dir, true)); Assert.assertFalse("File doesn't exist", exists(fSys, file)); Assert.assertFalse("Dir doesn't exist", exists(fSys, dir)); @@ -790,74 +824,46 @@ public void testDeleteEmptyDirectory() throws IOException { @Test public void testRenameNonExistentPath() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/nonExistent"); Path dst = getTestRootPath(fSys, "test/new/newpath"); - try { - rename(src, dst, false, false, false, Rename.NONE); - Assert.fail("Should throw FileNotFoundException"); - } catch (IOException e) { - Log.getLog().info("XXX", e); - Assert.assertTrue(unwrapException(e) instanceof FileNotFoundException); - } - - try { - rename(src, dst, false, false, false, Rename.OVERWRITE); - Assert.fail("Should throw FileNotFoundException"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileNotFoundException); - } + intercept(FileNotFoundException.class, () -> + rename(src, dst, false, false, false, Rename.NONE)); + intercept(FileNotFoundException.class, () -> + rename(src, dst, false, false, false, Rename.OVERWRITE)); } @Test public void testRenameFileToNonExistentDirectory() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); Path dst = getTestRootPath(fSys, "test/nonExistent/newfile"); - - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileNotFoundException); - } - - try { - rename(src, dst, false, true, false, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileNotFoundException); - } + intercept(IOException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); + intercept(IOException.class, () -> + rename(src, dst, false, true, false, Rename.OVERWRITE)); } @Test public void testRenameFileToDestinationWithParentFile() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); Path dst = getTestRootPath(fSys, "test/parentFile/newfile"); createFile(dst.getParent()); - - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - } - - try { - rename(src, dst, false, true, false, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - } + intercept(IOException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); + intercept(IOException.class, () -> + rename(src, dst, false, true, false, Rename.OVERWRITE)); } @Test public void testRenameFileToExistingParent() throws Exception { - if (!renameSupported()) return; - + assumeRenameSupported(); + Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); Path dst = getTestRootPath(fSys, "test/new/newfile"); @@ -867,116 +873,77 @@ public void testRenameFileToExistingParent() throws Exception { @Test public void testRenameFileToItself() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); + Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); - try { - rename(src, src, false, true, false, Rename.NONE); - Assert.fail("Renamed file to itself"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileAlreadyExistsException); - } + intercept(FileAlreadyExistsException.class, () -> + rename(src, src, false, true, false, Rename.NONE)); // Also fails with overwrite - try { - rename(src, src, false, true, false, Rename.OVERWRITE); - Assert.fail("Renamed file to itself"); - } catch (IOException e) { - // worked - } + intercept(IOException.class, () -> + rename(src, src, false, true, false, Rename.OVERWRITE)); } - + @Test public void testRenameFileAsExistingFile() throws Exception { - if (!renameSupported()) return; - + assumeRenameSupported(); + Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); Path dst = getTestRootPath(fSys, "test/new/existingFile"); createFile(dst); // Fails without overwrite option - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileAlreadyExistsException); - } - + intercept(FileAlreadyExistsException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); + // Succeeds with overwrite option rename(src, dst, true, false, true, Rename.OVERWRITE); } @Test public void testRenameFileAsExistingDirectory() throws Exception { - if (!renameSupported()) return; - + assumeRenameSupported(); + Path src = getTestRootPath(fSys, "test/hadoop/file"); createFile(src); Path dst = getTestRootPath(fSys, "test/new/existingDir"); fSys.mkdirs(dst); // Fails without overwrite option - try { - rename(src, dst, false, false, true, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - } - + intercept(IOException.class, () -> + rename(src, dst, false, false, true, Rename.NONE)); + + // File cannot be renamed as directory - try { - rename(src, dst, false, false, true, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - } + intercept(IOException.class, () -> + rename(src, dst, false, false, true, Rename.OVERWRITE)); } @Test public void testRenameDirectoryToItself() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); - try { - rename(src, src, false, true, false, Rename.NONE); - Assert.fail("Renamed directory to itself"); - } catch (IOException e) { - Assert.assertTrue(unwrapException(e) instanceof FileAlreadyExistsException); - } + intercept(FileAlreadyExistsException.class, () -> + rename(src, src, false, true, false, Rename.NONE)); // Also fails with overwrite - try { - rename(src, src, false, true, false, Rename.OVERWRITE); - Assert.fail("Renamed directory to itself"); - } catch (IOException e) { - // worked - } + intercept(IOException.class, () -> + rename(src, src, false, true, false, Rename.OVERWRITE)); } @Test public void testRenameDirectoryToNonExistentParent() throws Exception { - if (!renameSupported()) return; - + assumeRenameSupported(); + Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); Path dst = getTestRootPath(fSys, "test/nonExistent/newdir"); - - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - IOException ioException = unwrapException(e); - if (!(ioException instanceof FileNotFoundException)) { - throw ioException; - } - } + intercept(FileNotFoundException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); - try { - rename(src, dst, false, true, false, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - IOException ioException = unwrapException(e); - if (!(ioException instanceof FileNotFoundException)) { - throw ioException; - } - } + intercept(FileNotFoundException.class, () -> + rename(src, dst, false, true, false, Rename.OVERWRITE)); } @Test @@ -988,7 +955,7 @@ public void testRenameDirectoryAsNonExistentDirectory() throws Exception { private void doTestRenameDirectoryAsNonExistentDirectory(Rename... options) throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); @@ -999,19 +966,19 @@ private void doTestRenameDirectoryAsNonExistentDirectory(Rename... options) fSys.mkdirs(dst.getParent()); rename(src, dst, true, false, true, options); - Assert.assertFalse("Nested file1 exists", + Assert.assertFalse("Nested file1 exists", exists(fSys, getTestRootPath(fSys, "test/hadoop/dir/file1"))); - Assert.assertFalse("Nested file2 exists", + Assert.assertFalse("Nested file2 exists", exists(fSys, getTestRootPath(fSys, "test/hadoop/dir/subdir/file2"))); Assert.assertTrue("Renamed nested file1 exists", exists(fSys, getTestRootPath(fSys, "test/new/newdir/file1"))); - Assert.assertTrue("Renamed nested exists", + Assert.assertTrue("Renamed nested exists", exists(fSys, getTestRootPath(fSys, "test/new/newdir/subdir/file2"))); } @Test public void testRenameDirectoryAsEmptyDirectory() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); @@ -1022,20 +989,15 @@ public void testRenameDirectoryAsEmptyDirectory() throws Exception { fSys.mkdirs(dst); // Fails without overwrite option - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - // Expected (cannot over-write non-empty destination) - Assert.assertTrue(unwrapException(e) instanceof FileAlreadyExistsException); - } + intercept(FileAlreadyExistsException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); // Succeeds with the overwrite option rename(src, dst, true, false, true, Rename.OVERWRITE); } @Test public void testRenameDirectoryAsNonEmptyDirectory() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); @@ -1046,42 +1008,27 @@ public void testRenameDirectoryAsNonEmptyDirectory() throws Exception { fSys.mkdirs(dst); createFile(getTestRootPath(fSys, "test/new/newdir/file1")); // Fails without overwrite option - try { - rename(src, dst, false, true, false, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - // Expected (cannot over-write non-empty destination) - Assert.assertTrue(unwrapException(e) instanceof FileAlreadyExistsException); - } + intercept(FileAlreadyExistsException.class, () -> + rename(src, dst, false, true, false, Rename.NONE)); // Fails even with the overwrite option - try { - rename(src, dst, false, true, false, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException ex) { - // Expected (cannot over-write non-empty destination) - } + intercept(IOException.class, () -> + rename(src, dst, false, true, false, Rename.OVERWRITE)); } @Test public void testRenameDirectoryAsFile() throws Exception { - if (!renameSupported()) return; + assumeRenameSupported(); Path src = getTestRootPath(fSys, "test/hadoop/dir"); fSys.mkdirs(src); Path dst = getTestRootPath(fSys, "test/new/newfile"); createFile(dst); // Fails without overwrite option - try { - rename(src, dst, false, true, true, Rename.NONE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException e) { - } + intercept(IOException.class, () -> + rename(src, dst, false, true, true, Rename.NONE)); // Directory cannot be renamed as existing file - try { - rename(src, dst, false, true, true, Rename.OVERWRITE); - Assert.fail("Expected exception was not thrown"); - } catch (IOException ex) { - } + intercept(IOException.class, () -> + rename(src, dst, false, true, true, Rename.OVERWRITE)); } @Test @@ -1115,7 +1062,7 @@ public void testGetWrappedInputStream() throws IOException { FSDataInputStream in = fSys.open(src); InputStream is = in.getWrappedStream(); in.close(); - Assert.assertNotNull(is); + Assert.assertNotNull(is); } @Test @@ -1156,7 +1103,7 @@ private void rename(Path src, Path dst, boolean renameShouldSucceed, Assert.fail("rename should have thrown exception"); Assert.assertEquals("Source exists", srcExists, exists(fSys, src)); Assert.assertEquals("Destination exists", dstExists, exists(fSys, dst)); - } + } private boolean containsTestRootPath(Path path, FileStatus[] filteredPaths) throws IOException { Path testRootPath = getTestRootPath(fSys, path.toString()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java index 8050ce6b4427d..ae83a4b9e8b7e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; @@ -41,7 +42,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.CompletableFuture; import static org.apache.hadoop.fs.Options.ChecksumOpt; @@ -64,6 +64,8 @@ private interface MustNotImplement { public long getLength(Path f); public FSDataOutputStream append(Path f, int bufferSize); public void rename(Path src, Path dst, Rename... options); + + FileSystemRename3Action.RenameCallbacks createRenameCallbacks(); public boolean exists(Path f); public boolean isDirectory(Path f); public boolean isFile(Path f); @@ -251,6 +253,7 @@ CompletableFuture openFileWithOptions( MultipartUploaderBuilder createMultipartUploader(Path basePath) throws IOException; + } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameExTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameExTest.java new file mode 100644 index 0000000000000..378e3f73579d6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameExTest.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.junit.Test; + +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Path; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.assertListStatusFinds; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.rm; +import static org.apache.hadoop.fs.contract.ContractTestUtils.verifyFileContents; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test the rename(path, path, options) operations. + */ +public abstract class AbstractContractRenameExTest extends + AbstractFSContractTestBase { + + protected void renameEx( + final Path src, + final Path dst) + throws IOException { + getFileSystem().rename(src, dst, Options.Rename.NONE); + } + + protected void renameEx( + final Path src, + final Path dst, + final Options.Rename... options) + throws IOException { + getFileSystem().rename(src, dst, options); + } + + @Test + public void testRenameNewFileSameDir() throws Throwable { + describe("rename a file into a new file in the same directory"); + Path renameSrc = path("rename_src"); + Path renameTarget = path("rename_dest"); + byte[] data = dataset(256, 'a', 'z'); + writeDataset(getFileSystem(), renameSrc, + data, data.length, 1024 * 1024, false); + boolean rename = rename(renameSrc, renameTarget); + assertTrue("rename("+renameSrc+", "+ renameTarget+") returned false", + rename); + assertListStatusFinds(getFileSystem(), + renameTarget.getParent(), renameTarget); + verifyFileContents(getFileSystem(), renameTarget, data); + } + + @Test + public void testRenameNonexistentFile() throws Throwable { + describe("rename a file which does not exist"); + final Path missing = path("testRenameNonexistentFileSrc"); + final Path target = path("testRenameNonexistentFileDest"); + mkdirs(missing.getParent()); + IOException e = intercept(IOException.class, + () -> renameEx(missing, target)); + if (e instanceof FileNotFoundException) { + handleExpectedException(e); + } else { + handleRelaxedException("rename nonexistent file", + "FileNotFoundException", e); + } + assertPathDoesNotExist("rename nonexistent file created a destination file", + target); + } + + /** + * Rename test -handles filesystems that will overwrite the destination + * as well as those that do not (i.e. HDFS). + */ + @Test + public void testRenameFileOverExistingFile() throws Throwable { + describe("Verify renaming a file onto an existing file matches expectations"); + final Path srcFile = path("source-256.txt"); + final byte[] srcData = dataset(256, 'a', 'z'); + writeDataset(getFileSystem(), srcFile, srcData, srcData.length, 1024, false); + final Path destFile = path("dest-512.txt"); + final byte[] destData = dataset(512, 'A', 'Z'); + writeDataset(getFileSystem(), destFile, destData, destData.length, 1024, false); + assertIsFile(destFile); + IOException e = intercept(IOException.class, + () -> renameEx(srcFile, destFile)); + if (e instanceof FileAlreadyExistsException) { + handleExpectedException(e); + } else { + handleRelaxedException("rename over file", + "FileAlreadyExistsException", e); + } + // verify that the destination file is as expected based on the expected + // outcome + verifyFileContents(getFileSystem(), destFile, destData); + verifyFileContents(getFileSystem(), srcFile, srcData); + // now rename with overwrite + renameEx(srcFile, destFile, Options.Rename.OVERWRITE); + verifyFileContents(getFileSystem(), destFile, srcData); + assertPathDoesNotExist("Source still found", srcFile); + } + + /** + * Rename a file onto itself. + */ + @Test + public void testRenameFileOverSelf() throws Throwable { + describe("Verify renaming a file onto itself does not lose the data"); + final Path srcFile = path("source-256.txt"); + final byte[] srcData = dataset(256, 'a', 'z'); + writeDataset(getFileSystem(), srcFile, srcData, srcData.length, 1024, false); + intercept(FileAlreadyExistsException.class, + () -> renameEx(srcFile, srcFile)); + verifyFileContents(getFileSystem(), srcFile, srcData); + intercept(FileAlreadyExistsException.class, + () -> renameEx(srcFile, srcFile, Options.Rename.OVERWRITE)); + verifyFileContents(getFileSystem(), srcFile, srcData); + } + + /** + * Rename a dir onto itself. + */ + @Test + public void testRenameDirOverSelf() throws Throwable { + final Path src = path("testRenameDirOverSelf"); + getFileSystem().mkdirs(src); + intercept(FileAlreadyExistsException.class, + () -> renameEx(src, src)); + assertIsDirectory(src ); + } + + @Test + public void testRenameDirIntoExistingDir() throws Throwable { + describe("Verify renaming a dir into an existing dir puts it underneath" + +" and leaves existing files alone"); + FileSystem fs = getFileSystem(); + String sourceSubdir = "source"; + Path srcDir = path(sourceSubdir); + String sourceName = "source-256.txt"; + Path srcFilePath = new Path(srcDir, sourceName); + byte[] srcDataset = dataset(256, 'a', 'z'); + writeDataset(fs, srcFilePath, srcDataset, srcDataset.length, 1024, false); + Path destDir = path("dest"); + + Path destFilePath = new Path(destDir, "dest-512.txt"); + byte[] destDateset = dataset(512, 'A', 'Z'); + writeDataset(fs, destFilePath, destDateset, destDateset.length, 1024, false); + assertIsFile(destFilePath); + // no overwrite: fail + intercept(FileAlreadyExistsException.class, + () -> renameEx(srcDir, destDir)); + // overwrite fails if dest dir has a file + intercept(IOException.class, + () -> renameEx(srcDir, destDir, Options.Rename.OVERWRITE)); + // overwrite is good + // delete the dest file and all will be well + assertDeleted(destFilePath, false); + renameEx(srcDir, destDir, Options.Rename.OVERWRITE); + verifyFileContents(fs, new Path(destDir, sourceName), srcDataset); + assertPathDoesNotExist("Source still found", srcFilePath); + } + + @Test + public void testRenameFileNonexistentDir() throws Throwable { + describe("rename a file into a nonexistent directory"); + final Path renameSrc = path("testRenameSrc"); + final Path renameTarget = path("subdir/testRenameTarget"); + byte[] data = dataset(256, 'a', 'z'); + writeDataset(getFileSystem(), renameSrc, data, data.length, 1024 * 1024, + false); + intercept(FileNotFoundException.class, + () -> renameEx(renameSrc, renameTarget)); + verifyFileContents(getFileSystem(), renameSrc, data); + } + + @Test + public void testRenameWithNonEmptySubDir() throws Throwable { + final Path renameTestDir = path("testRenameWithNonEmptySubDir"); + final Path srcDir = new Path(renameTestDir, "src1"); + final Path srcSubDir = new Path(srcDir, "sub"); + final Path finalDir = new Path(renameTestDir, "dest"); + FileSystem fs = getFileSystem(); + boolean renameRemoveEmptyDest = isSupported(RENAME_REMOVE_DEST_IF_EMPTY_DIR); + rm(fs, renameTestDir, true, false); + + fs.mkdirs(srcDir); + fs.mkdirs(finalDir); + String sourceTextName = "source.txt"; + Path sourceTextPath = new Path(srcDir, sourceTextName); + writeTextFile(fs, sourceTextPath, + "this is the file in src dir", false); + Path subfileTxt = new Path(srcSubDir, "subfile.txt"); + writeTextFile(fs, subfileTxt, + "this is the file in src/sub dir", false); + + assertPathExists("not created in src dir", + sourceTextPath); + assertPathExists("not created in src/sub dir", + subfileTxt); + + // no overwrite: fail + intercept(FileAlreadyExistsException.class, + () -> renameEx(srcDir, finalDir)); + // now overwrite + renameEx(srcDir, finalDir, Options.Rename.OVERWRITE); + // Accept both POSIX rename behavior and CLI rename behavior + // POSIX rename behavior + assertPathExists("not renamed into dest dir", + new Path(finalDir, sourceTextName)); + assertPathExists("not renamed into dest/sub dir", + new Path(finalDir, "sub/subfile.txt")); + assertPathDoesNotExist("not deleted", + sourceTextPath); + } + + /** + * Test that after renaming, the nested subdirectory is moved along with all + * its ancestors. + */ + @Test + public void testRenamePopulatesDirectoryAncestors() throws IOException { + final FileSystem fs = getFileSystem(); + final Path src = path("testRenamePopulatesDirectoryAncestors/source"); + fs.mkdirs(src); + final String nestedDir = "/dir1/dir2/dir3/dir4"; + fs.mkdirs(path(src + nestedDir)); + + Path dst = path("testRenamePopulatesDirectoryAncestorsNew"); + + renameEx(src, dst); + validateAncestorsMoved(src, dst, nestedDir); + } + + /** + * Test that after renaming, the nested file is moved along with all its + * ancestors. It is similar to {@link #testRenamePopulatesDirectoryAncestors}. + */ + @Test + public void testRenamePopulatesFileAncestors() throws IOException { + final FileSystem fs = getFileSystem(); + final Path src = path("testRenamePopulatesFileAncestors/source"); + fs.mkdirs(src); + final String nestedFile = "/dir1/dir2/dir3/file4"; + byte[] srcDataset = dataset(256, 'a', 'z'); + writeDataset(fs, path(src + nestedFile), srcDataset, srcDataset.length, + 1024, false); + + Path dst = path("testRenamePopulatesFileAncestorsNew"); + + renameEx(src, dst); + validateAncestorsMoved(src, dst, nestedFile); + } + + /** + * Validate that the nested path and its ancestors should have been moved. + * + * @param src the source root to move + * @param dst the destination root to move + * @param nestedPath the nested path to move + */ + private void validateAncestorsMoved(Path src, Path dst, String nestedPath) + throws IOException { + assertIsDirectory(dst); + assertPathDoesNotExist("src path should not exist", path(src + nestedPath)); + assertPathExists("dst path should exist", path(dst + nestedPath)); + + Path path = new Path(nestedPath).getParent(); + while (path != null && !path.isRoot()) { + final Path parentSrc = path(src + path.toString()); + assertPathDoesNotExist(parentSrc + " is not deleted", parentSrc); + final Path parentDst = path(dst + path.toString()); + assertPathExists(parentDst + " should exist after rename", parentDst); + assertIsDirectory(parentDst); + path = path.getParent(); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java index 78ff2541483a3..f5ac066982871 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java @@ -52,7 +52,7 @@ public void testRenameNewFileSameDir() throws Throwable { @Test public void testRenameNonexistentFile() throws Throwable { - describe("rename a file into a new file in the same directory"); + describe("rename a file which does not exist"); Path missing = path("testRenameNonexistentFileSrc"); Path target = path("testRenameNonexistentFileDest"); boolean renameReturnsFalseOnFailure = @@ -161,7 +161,7 @@ public void testRenameDirIntoExistingDir() throws Throwable { @Test public void testRenameFileNonexistentDir() throws Throwable { - describe("rename a file into a new file in the same directory"); + describe("rename a file into a nonexistent directory"); Path renameSrc = path("testRenameSrc"); Path renameTarget = path("subdir/testRenameTarget"); byte[] data = dataset(256, 'a', 'z'); @@ -182,6 +182,7 @@ public void testRenameFileNonexistentDir() throws Throwable { // allowed unless that rename flag is set assertFalse(renameCreatesDestDirs); } + assertPathDoesNotExist("Source still found", renameSrc); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractRenameEx.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractRenameEx.java new file mode 100644 index 0000000000000..8130456ade653 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractRenameEx.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.localfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameExTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test the local FS supports renameEx semantics. + * This implicitly requires the checksums to be copied too. + */ +public class TestLocalFSContractRenameEx extends AbstractContractRenameExTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawlocalContractRenameEx.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawlocalContractRenameEx.java new file mode 100644 index 0000000000000..588e8592804cd --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawlocalContractRenameEx.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.rawlocal; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameExTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test the raw local FS supports renameEx semantics. + */ +public class TestRawlocalContractRenameEx extends AbstractContractRenameExTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new RawlocalFSContract(conf); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 25e7f7373226b..f1aeb38354ad9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -1171,7 +1171,6 @@ public boolean rename(final Path src, final Path dst) throws IOException { ).run(); } - @SuppressWarnings("deprecation") @Override public void rename(final Path src, final Path dst, final Options.Rename... options) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java index 43dd1b166a7a4..4d16d1a0a451b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirRenameOp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.base.Preconditions; + import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.InvalidPathException; @@ -43,6 +44,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; + +import static org.apache.hadoop.fs.FSExceptionMessages.*; import static org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException; import static org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException; @@ -376,13 +379,15 @@ static RenameResult unprotectedRenameTo(FSDirectory fsd, // validate the destination if (dst.equals(src)) { - throw new FileAlreadyExistsException("The source " + src + - " and destination " + dst + " are the same"); + throw new FileAlreadyExistsException( + String.format(RENAME_DEST_EQUALS_SOURCE, + src , dst)); } validateDestination(src, dst, srcInode); if (dstIIP.length() == 1) { - error = "rename destination cannot be the root"; + error = RENAME_DEST_IS_ROOT; + NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new IOException(error); @@ -399,13 +404,13 @@ static RenameResult unprotectedRenameTo(FSDirectory fsd, INode dstParent = dstIIP.getINode(-2); if (dstParent == null) { - error = "rename destination parent " + dst + " not found."; + error = String.format(RENAME_DEST_NO_PARENT_OF, dst); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new FileNotFoundException(error); } if (!dstParent.isDirectory()) { - error = "rename destination parent " + dst + " is a file."; + error = String.format(RENAME_DEST_PARENT_NOT_DIRECTORY, dst); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new ParentNotDirectoryException(error); @@ -531,8 +536,7 @@ private static void validateDestination( // dst cannot be a directory or a file under src if (dst.startsWith(src) && dst.charAt(src.length()) == Path.SEPARATOR_CHAR) { - error = "Rename destination " + dst - + " is a directory or file under source " + src; + error = String.format(RENAME_DEST_UNDER_SOURCE, dst, src); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new IOException(error); @@ -540,7 +544,8 @@ private static void validateDestination( if (FSDirectory.isExactReservedName(src) || FSDirectory.isExactReservedName(dst)) { - error = "Cannot rename to or from /.reserved"; + error = String.format("Cannot rename to or from reserved path;" + + " source =\"%s\", dest=\"%s\"", src, dst); throw new InvalidPathException(error); } } @@ -550,14 +555,13 @@ private static void validateOverwrite( throws IOException { String error;// It's OK to rename a file to a symlink and vice versa if (dstInode.isDirectory() != srcInode.isDirectory()) { - error = "Source " + src + " and destination " + dst - + " must both be directories"; + error = String.format(RENAME_SOURCE_DEST_DIFFERENT_TYPE, src, dst); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new IOException(error); } if (!overwrite) { // If destination exists, overwrite flag must be true - error = "rename destination " + dst + " already exists"; + error = String.format(RENAME_DEST_EXISTS, dst); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new FileAlreadyExistsException(error); @@ -566,7 +570,7 @@ private static void validateOverwrite( final ReadOnlyList children = dstInode.asDirectory() .getChildrenList(Snapshot.CURRENT_STATE_ID); if (!children.isEmpty()) { - error = "rename destination directory is not empty: " + dst; + error = String.format(RENAME_DEST_NOT_EMPTY, dst); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new IOException(error); @@ -581,13 +585,13 @@ private static void validateRenameSource(FSDirectory fsd, final INode srcInode = srcIIP.getLastINode(); // validate source if (srcInode == null) { - error = "rename source " + srcIIP.getPath() + " is not found."; + error = String.format(RENAME_SOURCE_NOT_FOUND, srcIIP.getPath()); NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new FileNotFoundException(error); } if (srcIIP.length() == 1) { - error = "rename source cannot be the root"; + error = RENAME_SOURCE_IS_ROOT; NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: " + error); throw new IOException(error); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractRenameEx.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractRenameEx.java new file mode 100644 index 0000000000000..1c2b4fe3b2e73 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractRenameEx.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.hdfs; + +import java.io.IOException; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameExTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test that HDFS supports renameEx semantics. + */ +public class TestHDFSContractRenameEx extends AbstractContractRenameExTest { + + @BeforeClass + public static void createCluster() throws IOException { + HDFSContract.createCluster(); + } + + @AfterClass + public static void teardownCluster() throws IOException { + HDFSContract.destroyCluster(); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new HDFSContract(conf); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 63c80bdd067e1..52c4d8cf6ac80 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -94,8 +94,11 @@ import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Globber; +import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.impl.OpenFileParameters; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; import org.apache.hadoop.fs.s3a.auth.SignerManager; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider; @@ -112,6 +115,7 @@ import org.apache.hadoop.fs.s3a.impl.OperationCallbacks; import org.apache.hadoop.fs.s3a.impl.RenameOperation; import org.apache.hadoop.fs.s3a.impl.S3AMultipartUploaderBuilder; +import org.apache.hadoop.fs.s3a.impl.S3ARenameCallbacks; import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.impl.StoreContextBuilder; @@ -168,6 +172,7 @@ import org.apache.hadoop.util.SemaphoredDelegatingExecutor; import org.apache.hadoop.util.concurrent.HadoopExecutors; +import static org.apache.hadoop.fs.FSExceptionMessages.RENAME_DEST_PARENT_NOT_DIRECTORY; import static org.apache.hadoop.fs.impl.AbstractFSBuilderImpl.rejectUnknownMandatoryKeys; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; import static org.apache.hadoop.fs.s3a.Constants.*; @@ -1347,7 +1352,6 @@ public FSDataOutputStream append(Path f, int bufferSize, + "by S3AFileSystem"); } - /** * Renames Path src to Path dst. Can take place on local fs * or remote DFS. @@ -1526,6 +1530,54 @@ private long innerRename(Path source, Path dest) return renameOperation.execute(); } + /** + * + * @param source path to be renamed + * @param dest new path after rename + * @param options rename options. + * @throws IOException + */ + @Override + public void rename(final Path source, + final Path dest, + final Options.Rename... options) throws IOException { + Path src = qualify(source); + Path dst = qualify(dest); + + LOG.debug("Rename path {} to {}", src, dst); + entryPoint(INVOCATION_RENAME); + super.rename(source, dest, options); + } + + /** + * Return the S3A rename callbacks; mocking tests + * may wish to override. + */ + protected FileSystemRename3Action.RenameCallbacks createRenameCallbacks() { + return new S3ARenameCallbacks( + createStoreContext(), + operationCallbacks, + pageSize, + new S3ARenameCallbacks.Probes() { + @Override + public S3AFileStatus stat(final Path f, + final boolean needEmptyDirectoryFlag, + final Set probes) throws IOException { + return innerGetFileStatus(f, needEmptyDirectoryFlag, probes); + } + + @Override + public boolean isDir(final Path path) throws IOException { + return isDirectory(path); + } + + @Override + public boolean dirHasChildren(final Path path) throws IOException { + return listStatusIterator(path).hasNext(); + } + }); + } + @Override public Token getFsDelegationToken() throws IOException { return getDelegationToken(null); @@ -2650,6 +2702,7 @@ void maybeCreateFakeParentDirectory(Path path) * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ + @Retries.RetryTranslated public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { return once("listStatus", f.toString(), () -> innerListStatus(f)); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3ARenameCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3ARenameCallbacks.java new file mode 100644 index 0000000000000..c95486a565968 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3ARenameCallbacks.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.ParentNotDirectoryException; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.impl.FileSystemRename3Action; +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.Tristate; + +import static org.apache.hadoop.fs.FSExceptionMessages.RENAME_DEST_PARENT_NOT_DIRECTORY; +import static org.apache.hadoop.fs.s3a.Invoker.once; + +/** + * This is the S3A rename/3 support; which calls back into + * the S3A FS for some actions, and delegates the rest to + * a delete and a rename operation. + *

+ * The FileStatus references returned are always {@link S3AFileStatus} + * instances; that is relied upon in + * {@link #executeRename(FileSystemRename3Action.ExecuteRenameParams)}. + */ +public class S3ARenameCallbacks extends AbstractStoreOperation implements + FileSystemRename3Action.RenameCallbacks { + + private static final Logger LOG = LoggerFactory.getLogger( + org.apache.hadoop.fs.s3a.impl.S3ARenameCallbacks.class); + + /** + * operations callbacks to hand off to delete and rename. + */ + private final OperationCallbacks operationCallbacks; + + /** + * Extra FS probes needed here. + */ + private final Probes probes; + + /** + * Number of entries in a delete page. + */ + private final int pageSize; + + /** + * Constructor. + * @param storeContext store + * @param operationCallbacks callbacks for delete and rename. + * @param pageSize Number of entries in a delete page. + * @param probes Extra FS probes needed here. + */ + public S3ARenameCallbacks( + final StoreContext storeContext, + final OperationCallbacks operationCallbacks, + final int pageSize, final Probes probes) { + super(storeContext); + this.operationCallbacks = operationCallbacks; + this.probes = probes; + this.pageSize = pageSize; + } + + @Override + public void close() throws IOException { + + } + + @Override + @Retries.RetryTranslated + public S3AFileStatus getSourceStatus(final Path sourcePath) + throws FileNotFoundException, IOException { + return probes.stat(sourcePath, false, + StatusProbeEnum.ALL); + } + + @Override + @Retries.RetryTranslated + public S3AFileStatus getDestStatusOrNull(final Path destPath) + throws IOException { + try { + return probes.stat(destPath, true, + StatusProbeEnum.ALL); + } catch (FileNotFoundException e) { + return null; + } + } + + /** + * Optimized probe which looks at empty dir state of the + * directory passed in, if known. + * {@inheritDoc}. + */ + @Override + @Retries.RetryTranslated + public boolean directoryHasChildren(final FileStatus directory) + throws IOException { + if (directory instanceof S3AFileStatus) { + S3AFileStatus st = (S3AFileStatus) directory; + if (st.isEmptyDirectory() == Tristate.TRUE) { + return false; + } + if (st.isEmptyDirectory() == Tristate.FALSE) { + return true; + } + } + return probes.dirHasChildren(directory.getPath()); + } + /** + * Optimized probe which assumes the path is a parent, so + * checks directory status first. + * {@inheritDoc}. + */ + @Override + @Retries.RetryTranslated + public void verifyIsDirectory(final Path destParent) + throws ParentNotDirectoryException, IOException { + if (probes.isDir(destParent)) { + return; + } + // either nothing there, or it is a file. + // do a HEAD, raising FNFE if it is not there + probes.stat(destParent, false, StatusProbeEnum.FILE); + // if we get here, no exception was raised, therefore HEAD + // succeeded, therefore there is a file at the path. + throw new ParentNotDirectoryException( + String.format(RENAME_DEST_PARENT_NOT_DIRECTORY, destParent)); + } + + /** + * This is a highly optimized rename operation which + * avoids performing any duplicate metadata probes, + * as well as raising all failures as meaningful exceptions. + * {@inheritDoc}. + * @param params + */ + @Override + @Retries.RetryTranslated + public void executeRename( + final FileSystemRename3Action.ExecuteRenameParams params) + throws PathIOException, IOException { + + final S3AFileStatus sourceStatus = + (S3AFileStatus) params.getSourceStatus(); + final Path sourcePath = params.getSourcePath(); + final Path destPath = params.getDestPath(); + final String source = sourcePath.toString(); + final StoreContext context = getStoreContext(); + String srcKey = context.pathToKey(sourcePath); + String dstKey = context.pathToKey(destPath); + + + once("rename(" + source + ", " + + destPath + ")", source, + () -> { + final S3AFileStatus destStatus + = (S3AFileStatus) params.getDestStatus(); + if (params.isDeleteDest()) { + // Delete the file or directory marker + // (Which is all which can be there) + DeleteOperation deleteOperation = new DeleteOperation( + context, + destStatus, + false, + operationCallbacks, + pageSize); + deleteOperation.execute(); + } + + // Initiate the rename. + RenameOperation renameOperation = new RenameOperation( + context, + sourcePath, srcKey, (S3AFileStatus) sourceStatus, + destPath, dstKey, destStatus, + operationCallbacks, + pageSize); + long bytesCopied = renameOperation.execute(); + LOG.debug("Copied {} bytes", bytesCopied); + }); + } + + /** + * Callback for the operation. + */ + public interface Probes { + + S3AFileStatus stat(final Path f, + final boolean needEmptyDirectoryFlag, + final Set probes) throws IOException; + + boolean isDir(Path path) throws IOException; + + boolean dirHasChildren(Path path) throws IOException; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRenameEx.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRenameEx.java new file mode 100644 index 0000000000000..8a602521b0a8a --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRenameEx.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameExTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; + +/** + * S3A contract tests covering rename/3. + */ +public class ITestS3AContractRenameEx extends AbstractContractRenameExTest { + + /** + * Create a configuration, possibly patching in S3Guard options. + * @return a configuration + */ + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + // patch in S3Guard options + maybeEnableS3Guard(conf); + return conf; + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AFSMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AFSMainOperations.java new file mode 100644 index 0000000000000..2c98a153932ea --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AFSMainOperations.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSMainOperationsBaseTest; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + +import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; + +/** + * S3A version of the {@link FSMainOperationsBaseTest} test suite. + */ +public class ITestS3AFSMainOperations extends FSMainOperationsBaseTest { + + public ITestS3AFSMainOperations() { + super(S3ATestUtils.createTestPath( + new Path("/ITestS3AFSMainOperations")).toString()); + } + + @Override + public void setUp() throws Exception { + AbstractFSContract contract = createContract(createConfiguration()); + contract.init(); + //extract the test FS + fSys = contract.getTestFileSystem(); + super.setUp(); + } + + @Override + protected FileSystem createFileSystem() throws Exception { + return checkNotNull(fSys); + } + + /** + * Create a configuration, possibly patching in S3Guard options. + * @return a configuration + */ + private Configuration createConfiguration() { + Configuration conf = new Configuration(); + // patch in S3Guard options + maybeEnableS3Guard(conf); + return conf; + } + + private AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + /** + * Override point: are posix-style permissions supported. + * @return false + */ + @Override + protected boolean permissionsSupported() { + return false; + } + +}