From 65c7c6696ab3d6a9f69c235b915d4bf30d190b52 Mon Sep 17 00:00:00 2001 From: Oleksii Dymytrov Date: Tue, 22 Nov 2016 17:33:54 +0200 Subject: [PATCH 1/3] YARN-5924 - Resource Manager fails to load state with InvalidProtocolBufferException --- .../recovery/FileSystemRMStateStore.java | 33 +++++++++++++------ .../recovery/TestFSRMStateStore.java | 32 ++++++++++++++++++ 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index b9a43749294ff..9fa59ac21ef57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -29,6 +29,7 @@ import java.util.HashMap; import java.util.List; +import com.google.protobuf.InvalidProtocolBufferException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -298,17 +299,29 @@ private void processDirectoriesOfFiles( String dirName = dir.getPath().getName(); for (FileStatus fileNodeStatus : listStatusWithRetries(dir.getPath())) { assert fileNodeStatus.isFile(); - String fileName = fileNodeStatus.getPath().getName(); - if (checkAndRemovePartialRecordWithRetries(fileNodeStatus.getPath())) { - continue; + try { + String fileName = fileNodeStatus.getPath().getName(); + if (checkAndRemovePartialRecordWithRetries(fileNodeStatus.getPath())) { + continue; + } + byte[] fileData = readFileWithRetries(fileNodeStatus.getPath(), + fileNodeStatus.getLen()); + // Set attribute if not already set + setUnreadableBySuperuserXattrib(fileNodeStatus.getPath()); + + rmAppStateFileProcessor.processChildNode(dirName, fileName, + fileData); + } catch (InvalidProtocolBufferException ex) { + LOG.warn("Removing broken " + dir.getPath() + " app's directory as its data is invalid, to prevent RM restart failure.", ex); + // removing directory with broken data + if (existsWithRetries(dir.getPath())) { + deleteFileWithRetries(dir.getPath()); + } + break; + } catch (Exception ex) { + LOG.warn("Skipping state loading for " + dir.getPath() + " app's directory because of exception to prevent RM restart failure", ex); + break; } - byte[] fileData = readFileWithRetries(fileNodeStatus.getPath(), - fileNodeStatus.getLen()); - // Set attribute if not already set - setUnreadableBySuperuserXattrib(fileNodeStatus.getPath()); - - rmAppStateFileProcessor.processChildNode(dirName, fileName, - fileData); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java index 6f0d53f243dfe..f8d8528137422 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java @@ -207,6 +207,38 @@ public void testFSRMStateStore() throws Exception { } } + @Test(timeout = 60000) + public void testInvalidAppDataLoad() throws Exception { + HdfsConfiguration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + try { + fsTester = new TestFSRMStateStoreTester(cluster, false); + // If the state store is FileSystemRMStateStore then add invalid application data. + // It should discard the entry and remove application with broken data from the file system. + FileSystemRMStateStore fileSystemRMStateStore = + (FileSystemRMStateStore) fsTester.getRMStateStore(); + String appIdStr = "application_1477986176766_0134"; + ApplicationId appId = + ApplicationId.fromString(appIdStr); + Path appDir = + fsTester.store.getAppDir(appId.toString()); + Path tempAppFile = + new Path(appDir, appId.toString()); + + // write invalid data + try (FSDataOutputStream fsOut = fileSystemRMStateStore.fs.create(tempAppFile, false)) { + fsOut.write("\\00\\00\\00\\00\\00".getBytes()); + } + + Assert.assertTrue(fileSystemRMStateStore.fs.exists(tempAppFile)); + fsTester.getRMStateStore().loadState(); + Assert.assertFalse(fileSystemRMStateStore.fs.exists(tempAppFile)); + } finally { + cluster.shutdown(); + } + } + @Test(timeout = 60000) public void testHDFSRMStateStore() throws Exception { final HdfsConfiguration conf = new HdfsConfiguration(); From 40d9be91614bfcc4b9204199fc91ee4b64a68c07 Mon Sep 17 00:00:00 2001 From: Oleksii Dymytrov Date: Thu, 24 Nov 2016 15:31:18 +0200 Subject: [PATCH 2/3] YARN-5924 - Resource Manager fails to load state with InvalidProtocolBufferException: fixed checkstyle and whitespace warnings --- .../recovery/FileSystemRMStateStore.java | 10 +++++++--- .../resourcemanager/recovery/TestFSRMStateStore.java | 11 +++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index 9fa59ac21ef57..bc8cb630b7462 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -301,7 +301,8 @@ private void processDirectoriesOfFiles( assert fileNodeStatus.isFile(); try { String fileName = fileNodeStatus.getPath().getName(); - if (checkAndRemovePartialRecordWithRetries(fileNodeStatus.getPath())) { + if (checkAndRemovePartialRecordWithRetries( + fileNodeStatus.getPath())) { continue; } byte[] fileData = readFileWithRetries(fileNodeStatus.getPath(), @@ -312,14 +313,17 @@ private void processDirectoriesOfFiles( rmAppStateFileProcessor.processChildNode(dirName, fileName, fileData); } catch (InvalidProtocolBufferException ex) { - LOG.warn("Removing broken " + dir.getPath() + " app's directory as its data is invalid, to prevent RM restart failure.", ex); + LOG.warn("Removing broken " + dir.getPath() + " app's directory " + + "as its data is invalid, to prevent RM restart failure.", ex); // removing directory with broken data if (existsWithRetries(dir.getPath())) { deleteFileWithRetries(dir.getPath()); } break; } catch (Exception ex) { - LOG.warn("Skipping state loading for " + dir.getPath() + " app's directory because of exception to prevent RM restart failure", ex); + LOG.warn("Skipping state loading for " + dir.getPath() + + " app's directory because of exception" + + " to prevent RM restart failure", ex); break; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java index f8d8528137422..642430637c16d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java @@ -214,8 +214,10 @@ public void testInvalidAppDataLoad() throws Exception { new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); try { fsTester = new TestFSRMStateStoreTester(cluster, false); - // If the state store is FileSystemRMStateStore then add invalid application data. - // It should discard the entry and remove application with broken data from the file system. + // If the state store is FileSystemRMStateStore + // then add invalid application data. + // It should discard the entry and remove application + // with broken data from the file system. FileSystemRMStateStore fileSystemRMStateStore = (FileSystemRMStateStore) fsTester.getRMStateStore(); String appIdStr = "application_1477986176766_0134"; @@ -226,8 +228,9 @@ public void testInvalidAppDataLoad() throws Exception { Path tempAppFile = new Path(appDir, appId.toString()); - // write invalid data - try (FSDataOutputStream fsOut = fileSystemRMStateStore.fs.create(tempAppFile, false)) { + // write invalid data + try (FSDataOutputStream fsOut = + fileSystemRMStateStore.fs.create(tempAppFile, false)) { fsOut.write("\\00\\00\\00\\00\\00".getBytes()); } From 4b34c5111edd7cb82c7bf8a9da08cc957fc07a01 Mon Sep 17 00:00:00 2001 From: Oleksii Dymytrov Date: Mon, 20 Mar 2017 15:06:53 +0200 Subject: [PATCH 3/3] YARN-5924: fixed whitespace issue --- .../resourcemanager/recovery/FileSystemRMStateStore.java | 2 +- .../resourcemanager/recovery/TestFSRMStateStore.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index bc8cb630b7462..34b94d6556fb4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -321,7 +321,7 @@ private void processDirectoriesOfFiles( } break; } catch (Exception ex) { - LOG.warn("Skipping state loading for " + dir.getPath() + + LOG.warn("Skipping state loading for " + dir.getPath() + " app's directory because of exception" + " to prevent RM restart failure", ex); break; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java index 642430637c16d..582629851a029 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java @@ -214,9 +214,9 @@ public void testInvalidAppDataLoad() throws Exception { new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); try { fsTester = new TestFSRMStateStoreTester(cluster, false); - // If the state store is FileSystemRMStateStore + // If the state store is FileSystemRMStateStore // then add invalid application data. - // It should discard the entry and remove application + // It should discard the entry and remove application // with broken data from the file system. FileSystemRMStateStore fileSystemRMStateStore = (FileSystemRMStateStore) fsTester.getRMStateStore(); @@ -227,9 +227,9 @@ public void testInvalidAppDataLoad() throws Exception { fsTester.store.getAppDir(appId.toString()); Path tempAppFile = new Path(appDir, appId.toString()); - + // write invalid data - try (FSDataOutputStream fsOut = + try (FSDataOutputStream fsOut = fileSystemRMStateStore.fs.create(tempAppFile, false)) { fsOut.write("\\00\\00\\00\\00\\00".getBytes()); }