diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java index 9fc18ca3bbf92..7dd10359b42a0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java @@ -267,7 +267,10 @@ private static void doDiskIo(File dir) throws DiskErrorException { ioe = e; } } - throw ioe; // Just rethrow the last exception to signal failure. + // Throw the exception only if it's not about disk being full. + if (!ioe.getMessage().contains("No space left")) { + throw ioe; // Just rethrow the last exception to signal failure. + } } catch(IOException e) { throw new DiskErrorException("Error checking directory " + dir, e); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDiskCheckerWithDiskIo.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDiskCheckerWithDiskIo.java index 47dd154d96070..b91f8ced1e756 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDiskCheckerWithDiskIo.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDiskCheckerWithDiskIo.java @@ -33,6 +33,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; /** @@ -48,7 +49,7 @@ public final class TestDiskCheckerWithDiskIo { @Test public final void testDiskIoIgnoresTransientCreateErrors() throws Throwable { DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider( - DiskChecker.DISK_IO_MAX_ITERATIONS - 1, 0)); + DiskChecker.DISK_IO_MAX_ITERATIONS - 1, 0, false)); checkDirs(true); } @@ -59,7 +60,7 @@ public final void testDiskIoIgnoresTransientCreateErrors() throws Throwable { public final void testDiskIoDetectsCreateErrors() throws Throwable { assertThrows(DiskErrorException.class, () -> { DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider( - DiskChecker.DISK_IO_MAX_ITERATIONS, 0)); + DiskChecker.DISK_IO_MAX_ITERATIONS, 0, false)); checkDirs(false); }); } @@ -70,7 +71,7 @@ public final void testDiskIoDetectsCreateErrors() throws Throwable { @Test public final void testDiskIoIgnoresTransientWriteErrors() throws Throwable { DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider( - 0, DiskChecker.DISK_IO_MAX_ITERATIONS - 1)); + 0, DiskChecker.DISK_IO_MAX_ITERATIONS - 1, false)); checkDirs(true); } @@ -81,7 +82,7 @@ public final void testDiskIoIgnoresTransientWriteErrors() throws Throwable { public final void testDiskIoDetectsWriteErrors() throws Throwable { assertThrows(DiskErrorException.class, ()->{ DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider( - 0, DiskChecker.DISK_IO_MAX_ITERATIONS)); + 0, DiskChecker.DISK_IO_MAX_ITERATIONS, false)); checkDirs(false); }); } @@ -104,6 +105,18 @@ public void testDiskIoFileNaming() { "File name does not match expected pattern: " + guidFile); } + /** + * Verify DiskChecker doesn't fail on ENOSPC errors. + */ + @Test + public void testDiskIoDetectsENOSPCWriteErrors() { + assertDoesNotThrow(()->{ + DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider( + 0, DiskChecker.DISK_IO_MAX_ITERATIONS, true)); + checkDirs(true); + }); + } + /** * A dummy {@link DiskChecker#FileIoProvider} that can throw a programmable * number of times. @@ -114,11 +127,13 @@ private static class TestFileIoProvider implements FileIoProvider { private final int numTimesToThrowOnCreate; private final int numTimesToThrowOnWrite; + private final boolean throwENOSPCError; public TestFileIoProvider( - int numTimesToThrowOnCreate, int numTimesToThrowOnWrite) { + int numTimesToThrowOnCreate, int numTimesToThrowOnWrite, boolean throwENOSPCError) { this.numTimesToThrowOnCreate = numTimesToThrowOnCreate; this.numTimesToThrowOnWrite = numTimesToThrowOnWrite; + this.throwENOSPCError = throwENOSPCError; } /** @@ -139,7 +154,11 @@ public FileOutputStream get(File f) throws FileNotFoundException { @Override public void write(FileOutputStream fos, byte[] data) throws IOException { if (numWriteCalls.getAndIncrement() < numTimesToThrowOnWrite) { - throw new IOException("Dummy exception for testing"); + if (!throwENOSPCError) { + throw new IOException("Dummy exception for testing"); + } else { + throw new IOException("No space left on device"); + } } fos.write(data); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index c72bc50c0fb76..49e36713ea788 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -190,6 +190,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_DN_CACHED_DFSUSED_CHECK_INTERVAL_DEFAULT_MS = 600000; + public static final String DFS_DATANODE_CHECK_DIR_WITH_DISKIO = + "dfs.datanode.check.dir.with.diskio"; + public static final boolean DFS_DATANODE_CHECK_DIR_WITH_DISKIO_DEFAULT = false; + public static final String DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT = "dfs.namenode.path.based.cache.block.map.allocation.percent"; public static final float DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java index 8c643e9e16ace..246581fbfb86c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java @@ -120,6 +120,7 @@ public class BlockPoolSlice { private final long cachedDfsUsedCheckTime; private final Timer timer; private final int maxDataLength; + private final boolean checkDirWithDiskIo; private final FileIoProvider fileIoProvider; private final Configuration config; private final File bpDir; @@ -179,6 +180,10 @@ public int compare(File f1, File f2) { CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT); + this.checkDirWithDiskIo = conf.getBoolean( + DFSConfigKeys.DFS_DATANODE_CHECK_DIR_WITH_DISKIO, + DFSConfigKeys.DFS_DATANODE_CHECK_DIR_WITH_DISKIO_DEFAULT); + this.timer = timer; // Files that were being written when the datanode was last shutdown @@ -484,9 +489,15 @@ ReplicaInfo activateSavedReplica(ReplicaInfo replicaInfo, } void checkDirs() throws DiskErrorException { - DiskChecker.checkDir(finalizedDir); - DiskChecker.checkDir(tmpDir); - DiskChecker.checkDir(rbwDir); + if (checkDirWithDiskIo) { + DiskChecker.checkDirWithDiskIo(finalizedDir); + DiskChecker.checkDirWithDiskIo(tmpDir); + DiskChecker.checkDirWithDiskIo(rbwDir); + } else { + DiskChecker.checkDir(finalizedDir); + DiskChecker.checkDir(tmpDir); + DiskChecker.checkDir(rbwDir); + } }