From b125f2f2761ae81abd2c2c049666fc1b0ca51e0c Mon Sep 17 00:00:00 2001 From: Imran Rashid Date: Mon, 26 Sep 2016 21:56:00 -0500 Subject: [PATCH 1/3] [SPARK-17676][CORE] FsHistoryProvider should ignore hidden files FsHistoryProvider was writing a hidden file (to check the fs's clock). Even though it deleted the file immediately, sometimes another thread would try to scan the files on the fs in-between, and then there would be an error msg logged which was very misleading for the end-user. (The logged error was harmless, though.) --- .../deploy/history/FsHistoryProvider.scala | 4 ++- .../history/FsHistoryProviderSuite.scala | 27 +++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 6874aa5f938a..2a3955575d77 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -290,7 +290,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) .filter { entry => try { val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L) - !entry.isDirectory() && prevFileSize < entry.getLen() + !entry.isDirectory() && + !entry.getPath().getName().startsWith(".") && + prevFileSize < entry.getLen() } catch { case e: AccessControlException => // Do not use "logInfo" since these messages can get pretty noisy if printed on diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 39c5857b1345..296804f8fda3 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -17,8 +17,7 @@ package org.apache.spark.deploy.history -import java.io.{BufferedOutputStream, ByteArrayInputStream, ByteArrayOutputStream, File, - FileOutputStream, OutputStreamWriter} +import java.io._ import java.net.URI import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit @@ -394,6 +393,30 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc } } + test("ignore hidden files") { + // FsHistoryProvider should ignore hidden files. (It even writes out a hidden file itself + // that should be ignored). Note that this test really *should* also check that no errors are + // logged, but that part has to be manual for now. + val hiddenFile = new File(testDir, ".garbage") + val out = new PrintWriter(hiddenFile) + // scalastyle:off println + out.println("GARBAGE") + // scalastyle:on println + out.close() + + val newAppComplete = newLogFile("new1", None, inProgress = false) + writeFile(newAppComplete, true, None, + SparkListenerApplicationStart(newAppComplete.getName(), Some("new-app-complete"), 1L, "test", + None), + SparkListenerApplicationEnd(5L) + ) + + val provider = new FsHistoryProvider(createTestConf()) + updateAndCheck(provider) { list => + list.size should be (1) + } + } + /** * Asks the provider to check for logs and calls a function to perform checks on the updated * app list. Example: From 9c9012e0965e994eda18fe783e36aef5a7b348fb Mon Sep 17 00:00:00 2001 From: Imran Rashid Date: Tue, 27 Sep 2016 14:35:57 -0500 Subject: [PATCH 2/3] add comment --- .../org/apache/spark/deploy/history/FsHistoryProvider.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 2a3955575d77..dc7a6ae42d1b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -291,6 +291,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) try { val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L) !entry.isDirectory() && + // FsHistoryProvider generates a hidden file which can't be read, and the user may + // generate others which can't be read. Accidentally reading a garbage file is safe, + // but we would log an error which can be scary to the end-user. !entry.getPath().getName().startsWith(".") && prevFileSize < entry.getLen() } catch { From 2d37bd33235f673392b17613b7615edf6a0a0604 Mon Sep 17 00:00:00 2001 From: Imran Rashid Date: Thu, 29 Sep 2016 15:02:05 -0500 Subject: [PATCH 3/3] review feedback --- .../deploy/history/FsHistoryProvider.scala | 6 +++--- .../history/FsHistoryProviderSuite.scala | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index dc7a6ae42d1b..0d2610f27e9a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -291,9 +291,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) try { val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L) !entry.isDirectory() && - // FsHistoryProvider generates a hidden file which can't be read, and the user may - // generate others which can't be read. Accidentally reading a garbage file is safe, - // but we would log an error which can be scary to the end-user. + // FsHistoryProvider generates a hidden file which can't be read. Accidentally + // reading a garbage file is safe, but we would log an error which can be scary to + // the end-user. !entry.getPath().getName().startsWith(".") && prevFileSize < entry.getLen() } catch { diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 296804f8fda3..01bef0a11c12 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -394,17 +394,25 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc } test("ignore hidden files") { + // FsHistoryProvider should ignore hidden files. (It even writes out a hidden file itself - // that should be ignored). Note that this test really *should* also check that no errors are - // logged, but that part has to be manual for now. - val hiddenFile = new File(testDir, ".garbage") - val out = new PrintWriter(hiddenFile) + // that should be ignored). + + // write out one totally bogus hidden file + val hiddenGarbageFile = new File(testDir, ".garbage") + val out = new PrintWriter(hiddenGarbageFile) // scalastyle:off println out.println("GARBAGE") // scalastyle:on println out.close() - val newAppComplete = newLogFile("new1", None, inProgress = false) + // also write out one real event log file, but since its a hidden file, we shouldn't read it + val tmpNewAppFile = newLogFile("hidden", None, inProgress = false) + val hiddenNewAppFile = new File(tmpNewAppFile.getParentFile, "." + tmpNewAppFile.getName) + tmpNewAppFile.renameTo(hiddenNewAppFile) + + // and write one real file, which should still get picked up just fine + val newAppComplete = newLogFile("real-app", None, inProgress = false) writeFile(newAppComplete, true, None, SparkListenerApplicationStart(newAppComplete.getName(), Some("new-app-complete"), 1L, "test", None), @@ -414,6 +422,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc val provider = new FsHistoryProvider(createTestConf()) updateAndCheck(provider) { list => list.size should be (1) + list(0).name should be ("real-app") } }