From d5e413dad3bc3859fa752293204a24f3e40768b8 Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Fri, 9 Apr 2021 11:44:27 +0530 Subject: [PATCH 1/5] HDFS-15961. standby namenode failed to start ordered snapshot deletion is enabled while having snapshottable directories. --- .../hdfs/server/namenode/FSNamesystem.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 7d9f78c0647fb..2d9614b4b79ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -124,7 +124,13 @@ import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.ipc.ObserverRetryOnActiveException; -import org.apache.hadoop.util.*; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.DataChecksum; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.VersionInfo; +import org.apache.hadoop.util.ExitUtil; import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.monotonicNow; @@ -8559,10 +8565,12 @@ void checkAccess(String src, FsAction mode) throws IOException { /** * Check if snapshot roots are created for all existing snapshottable * directories. Create them if not. + * Only the active NameNode needs to execute this in HA setup. */ @Override public void checkAndProvisionSnapshotTrashRoots() { - if (isSnapshotTrashRootEnabled) { + if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState() + || !haEnabled)) { try { SnapshottableDirectoryStatus[] dirStatusList = getSnapshottableDirListing(); @@ -8575,9 +8583,11 @@ public void checkAndProvisionSnapshotTrashRoots() { currDir += Path.SEPARATOR; } String trashPath = currDir + FileSystem.TRASH_PREFIX; - HdfsFileStatus fileStatus = getFileInfo(trashPath, false, false, false); + HdfsFileStatus fileStatus = + getFileInfo(trashPath, false, false, false); if (fileStatus == null) { - LOG.info("Trash doesn't exist for snapshottable directory {}. " + "Creating trash at {}", currDir, trashPath); + LOG.info("Trash doesn't exist for snapshottable directory {}. " + + "Creating trash at {}", currDir, trashPath); PermissionStatus permissionStatus = new PermissionStatus(getRemoteUser().getShortUserName(), null, SHARED_TRASH_PERMISSION); @@ -8585,9 +8595,8 @@ public void checkAndProvisionSnapshotTrashRoots() { } } } catch (IOException e) { - final String msg = - "Could not provision Trash directory for existing " - + "snapshottable directories. Exiting Namenode."; + final String msg = "Could not provision Trash directory for existing " + + "snapshottable directories. Exiting Namenode."; ExitUtil.terminate(1, msg); } From bb6dd1235c940accccb1054ed5767f1da1373634 Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Sat, 10 Apr 2021 10:38:00 +0530 Subject: [PATCH 2/5] Addressed review comments. --- .../hdfs/server/namenode/FSNamesystem.java | 11 ++++-- .../hadoop/hdfs/server/namenode/NameNode.java | 1 + .../server/namenode/ha/TestHASafeMode.java | 38 +++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 2d9614b4b79ff..746c819e14114 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -8565,12 +8565,17 @@ void checkAccess(String src, FsAction mode) throws IOException { /** * Check if snapshot roots are created for all existing snapshottable * directories. Create them if not. - * Only the active NameNode needs to execute this in HA setup. + * Only the active NameNode needs to execute this in HA setup once it is out + * of safe mode. + * + * The function gets called while exiting safe mode or post starting the + * services in Active NameNode, but comes into effect post whichever event + * happens later. */ @Override - public void checkAndProvisionSnapshotTrashRoots() { + public synchronized void checkAndProvisionSnapshotTrashRoots() { if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState() - || !haEnabled)) { + || !haEnabled) && !blockManager.isInSafeMode()) { try { SnapshottableDirectoryStatus[] dirStatusList = getSnapshottableDirListing(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 55196c4d44f03..26a2a61946d80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -2021,6 +2021,7 @@ public HAState getState() { public void startActiveServices() throws IOException { try { namesystem.startActiveServices(); + namesystem.checkAndProvisionSnapshotTrashRoots(); startTrashEmptier(getConf()); } catch (Throwable t) { doImmediateShutdown(t); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index e17bb6f53cb49..390616934f7c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -98,6 +98,7 @@ public void setupCluster() throws Exception { conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + conf.setBoolean("dfs.namenode.snapshot.trashroot.enabled", false); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) @@ -909,6 +910,43 @@ public Boolean get() { assertSafeMode(nn1, 3, 3, 3, 0); } + @Test + public void testNameNodeCreateSnapshotTrashRootOnHASetup() + throws Exception { + DistributedFileSystem dfs = cluster.getFileSystem(0); + final Path testDir = new Path("/disallowss/test2/"); + final Path file0path = new Path(testDir, "file-0"); + dfs.create(file0path).close(); + dfs.allowSnapshot(testDir); + // .Trash won't be created right now since snapshot trash is disabled + final Path trashRoot = new Path(testDir, FileSystem.TRASH_PREFIX); + assertFalse(dfs.exists(trashRoot)); + // Set dfs.namenode.snapshot.trashroot.enabled=true + cluster.getNameNode(0).getConf() + .setBoolean("dfs.namenode.snapshot.trashroot.enabled", true); + cluster.getNameNode(1).getConf() + .setBoolean("dfs.namenode.snapshot.trashroot.enabled", true); + restartActive(); + cluster.transitionToActive(1); + dfs = cluster.getFileSystem(1); + // Make sure .Trash path does not exist yet as on NN1 trash root is not + // enabled + assertFalse(dfs.exists(trashRoot)); + cluster.transitionToStandby(1); + cluster.transitionToActive(0); + dfs = cluster.getFileSystem(0); + // Check .Trash existence, should be created now + assertTrue(dfs.exists(trashRoot)); + assertFalse(cluster.getNameNode(0).isInSafeMode()); + restartStandby(); + // Ensure Standby namenode is up and running + assertTrue(cluster.getNameNode(1).isStandbyState()); + // Cleanup + dfs.delete(trashRoot, true); + dfs.disallowSnapshot(testDir); + dfs.delete(testDir, true); + } + /** * Test transition to active when namenode in safemode. * From 1e53a4942ff8ae8055981a19d796303ad2b7a2b5 Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Sat, 10 Apr 2021 10:44:07 +0530 Subject: [PATCH 3/5] Fixed indentation. --- .../server/namenode/ha/TestHASafeMode.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 390616934f7c6..562eb198fdb8e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -911,17 +911,16 @@ public Boolean get() { } @Test - public void testNameNodeCreateSnapshotTrashRootOnHASetup() - throws Exception { - DistributedFileSystem dfs = cluster.getFileSystem(0); - final Path testDir = new Path("/disallowss/test2/"); - final Path file0path = new Path(testDir, "file-0"); - dfs.create(file0path).close(); - dfs.allowSnapshot(testDir); - // .Trash won't be created right now since snapshot trash is disabled - final Path trashRoot = new Path(testDir, FileSystem.TRASH_PREFIX); - assertFalse(dfs.exists(trashRoot)); - // Set dfs.namenode.snapshot.trashroot.enabled=true + public void testNameNodeCreateSnapshotTrashRootOnHASetup() throws Exception { + DistributedFileSystem dfs = cluster.getFileSystem(0); + final Path testDir = new Path("/disallowss/test2/"); + final Path file0path = new Path(testDir, "file-0"); + dfs.create(file0path).close(); + dfs.allowSnapshot(testDir); + // .Trash won't be created right now since snapshot trash is disabled + final Path trashRoot = new Path(testDir, FileSystem.TRASH_PREFIX); + assertFalse(dfs.exists(trashRoot)); + // Set dfs.namenode.snapshot.trashroot.enabled=true cluster.getNameNode(0).getConf() .setBoolean("dfs.namenode.snapshot.trashroot.enabled", true); cluster.getNameNode(1).getConf() From 32c8de107a1b4161bcb436a7fb0123cfa634b34f Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Tue, 20 Apr 2021 15:33:16 +0530 Subject: [PATCH 4/5] Removed the termination condition for snapshot trash root provisioning failure on NN startup. --- .../apache/hadoop/hdfs/server/namenode/FSNamesystem.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 746c819e14114..d486564af2a0a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -130,7 +130,6 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; -import org.apache.hadoop.util.ExitUtil; import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.util.Time.monotonicNow; @@ -8600,11 +8599,9 @@ public synchronized void checkAndProvisionSnapshotTrashRoots() { } } } catch (IOException e) { - final String msg = "Could not provision Trash directory for existing " - + "snapshottable directories. Exiting Namenode."; - ExitUtil.terminate(1, msg); + LOG.error("Could not provision Trash directory for existing " + + "snapshottable directory", e); } - } } From aaeeec00a6e6a3e1829c5715668f9fcc054c2d6e Mon Sep 17 00:00:00 2001 From: Shashikant Banerjee Date: Fri, 23 Apr 2021 09:39:29 +0530 Subject: [PATCH 5/5] Changed exception log msg. --- .../hadoop/hdfs/server/namenode/FSNamesystem.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index d486564af2a0a..b05ff81f070d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -8575,13 +8575,15 @@ void checkAccess(String src, FsAction mode) throws IOException { public synchronized void checkAndProvisionSnapshotTrashRoots() { if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState() || !haEnabled) && !blockManager.isInSafeMode()) { + SnapshottableDirectoryStatus dirStatus = null; try { SnapshottableDirectoryStatus[] dirStatusList = getSnapshottableDirListing(); if (dirStatusList == null) { return; } - for (SnapshottableDirectoryStatus dirStatus : dirStatusList) { + for (SnapshottableDirectoryStatus status : dirStatusList) { + dirStatus = status; String currDir = dirStatus.getFullPath().toString(); if (!currDir.endsWith(Path.SEPARATOR)) { currDir += Path.SEPARATOR; @@ -8599,8 +8601,12 @@ public synchronized void checkAndProvisionSnapshotTrashRoots() { } } } catch (IOException e) { - LOG.error("Could not provision Trash directory for existing " - + "snapshottable directory", e); + if (dirStatus == null) { + LOG.error("Failed to get snapshottable directory list", e); + } else { + LOG.error("Could not provision Trash directory for existing " + + "snapshottable directory {}", dirStatus, e); + } } } }