From 16b7b400b57f0ac1a783a68e6219b0e520d7802f Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Mon, 6 Aug 2018 14:29:05 -0700
Subject: [PATCH 1/4] [SPARK-24948][SHS] Delegate check access permissions to
 the file system

In `SparkHadoopUtil. checkAccessPermission`,  we consider only basic permissions in order to check wether a user can access a file or not. This is not a complete check, as it ignores ACLs and other policies a file system may apply in its internal. So this can result in returning wrongly that a user cannot access a file (despite he actually can).

The PR proposes to delegate to the filesystem the check whether a file is accessible or not, in order to return the right result. A caching layer is added for performance reasons.

modified UTs

Author: Marco Gaido <marcogaido91@gmail.com>

Closes #21895 from mgaido91/SPARK-24948.
---
 .../apache/spark/deploy/SparkHadoopUtil.scala | 22 -----
 .../deploy/history/FsHistoryProvider.scala    | 45 +++++++--
 .../spark/deploy/SparkHadoopUtilSuite.scala   | 97 -------------------
 .../history/FsHistoryProviderSuite.scala      | 44 ++++++++-
 4 files changed, 78 insertions(+), 130 deletions(-)
 delete mode 100644 core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 550bd6854f67e..4600f7793dede 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -29,7 +29,6 @@ import scala.util.control.NonFatal
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
-import org.apache.hadoop.fs.permission.FsAction
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
@@ -379,27 +378,6 @@ class SparkHadoopUtil extends Logging {
     buffer.toString
   }
 
-  private[spark] def checkAccessPermission(status: FileStatus, mode: FsAction): Boolean = {
-    val perm = status.getPermission
-    val ugi = UserGroupInformation.getCurrentUser
-
-    if (ugi.getShortUserName == status.getOwner) {
-      if (perm.getUserAction.implies(mode)) {
-        return true
-      }
-    } else if (ugi.getGroupNames.contains(status.getGroup)) {
-      if (perm.getGroupAction.implies(mode)) {
-        return true
-      }
-    } else if (perm.getOtherAction.implies(mode)) {
-      return true
-    }
-
-    logDebug(s"Permission denied: user=${ugi.getShortUserName}, " +
-      s"path=${status.getPath}:${status.getOwner}:${status.getGroup}" +
-      s"${if (status.isDirectory) "d" else "-"}$perm")
-    false
-  }
 }
 
 object SparkHadoopUtil {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index f4235df245128..38f2491ac3928 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -19,15 +19,17 @@ package org.apache.spark.deploy.history
 
 import java.io.{FileNotFoundException, IOException, OutputStream}
 import java.util.UUID
-import java.util.concurrent.{Executors, ExecutorService, Future, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, Executors, ExecutorService, Future, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.mutable
+import scala.concurrent.ExecutionException
+import scala.util.Try
 import scala.xml.Node
 
 import com.google.common.io.ByteStreams
 import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.fs.permission.FsAction
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
@@ -105,7 +107,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     "; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS.toString)
 
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
-  private val fs = new Path(logDir).getFileSystem(hadoopConf)
+  // Visible for testing
+  private[history] val fs: FileSystem = new Path(logDir).getFileSystem(hadoopConf)
 
   // Used by check event thread and clean log thread.
   // Scheduled thread pool size must be one, otherwise it will have concurrent issues about fs
@@ -129,6 +132,25 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   private val pendingReplayTasksCount = new java.util.concurrent.atomic.AtomicInteger(0)
 
+  private val blacklist = new ConcurrentHashMap[String, Long]
+
+  // Visible for testing
+  private[history] def isBlacklisted(path: Path): Boolean = {
+    blacklist.containsKey(path.getName)
+  }
+
+  private def blacklist(path: Path): Unit = {
+    blacklist.put(path.getName, clock.getTimeMillis())
+  }
+
+  /**
+   * Removes expired entries in the blacklist, according to the provided `expireTimeInSeconds`.
+   */
+  private def clearBlacklist(expireTimeInSeconds: Long): Unit = {
+    val expiredThreshold = clock.getTimeMillis() - expireTimeInSeconds * 1000
+    blacklist.asScala.retain((_, creationTime) => creationTime >= expiredThreshold)
+  }
+
   /**
    * Return a runnable that performs the given operation on the event logs.
    * This operation is expected to be executed periodically.
@@ -326,7 +348,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             // the end-user.
             !entry.getPath().getName().startsWith(".") &&
             prevFileSize < entry.getLen() &&
-            SparkHadoopUtil.get.checkAccessPermission(entry, FsAction.READ)
+            !isBlacklisted(entry.getPath)
         }
         .flatMap { entry => Some(entry) }
         .sortWith { case (entry1, entry2) =>
@@ -337,13 +359,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         logDebug(s"New/updated attempts found: ${logInfos.size} ${logInfos.map(_.getPath)}")
       }
 
-      var tasks = mutable.ListBuffer[Future[_]]()
+      var tasks = mutable.ListBuffer[(Future[Unit], Path)]()
 
       try {
         for (file <- logInfos) {
-          tasks += replayExecutor.submit(new Runnable {
+          val task: Future[Unit] = replayExecutor.submit(new Runnable {
             override def run(): Unit = mergeApplicationListing(file)
-          })
+          }, Unit)
+          tasks += (task -> file.getPath)
         }
       } catch {
         // let the iteration over logInfos break, since an exception on
@@ -356,7 +379,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
       pendingReplayTasksCount.addAndGet(tasks.size)
 
-      tasks.foreach { task =>
+      tasks.foreach { case (task, path) =>
         try {
           // Wait for all tasks to finish. This makes sure that checkForLogs
           // is not scheduled again while some tasks are already running in
@@ -365,6 +388,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         } catch {
           case e: InterruptedException =>
             throw e
+          case e: ExecutionException if e.getCause.isInstanceOf[AccessControlException] =>
+            // We don't have read permissions on the log file
+            logWarning(s"Unable to read log $path", e.getCause)
+            blacklist(path)
           case e: Exception =>
             logError("Exception while merging application listings", e)
         } finally {
@@ -587,6 +614,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } catch {
       case t: Exception => logError("Exception in cleaning logs", t)
     }
+    // Clean the blacklist from the expired entries.
+    clearBlacklist(CLEAN_INTERVAL_S)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
deleted file mode 100644
index ab24a76e20a30..0000000000000
--- a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy
-
-import java.security.PrivilegedExceptionAction
-
-import scala.util.Random
-
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
-import org.apache.hadoop.security.UserGroupInformation
-import org.scalatest.Matchers
-
-import org.apache.spark.SparkFunSuite
-
-class SparkHadoopUtilSuite extends SparkFunSuite with Matchers {
-  test("check file permission") {
-    import FsAction._
-    val testUser = s"user-${Random.nextInt(100)}"
-    val testGroups = Array(s"group-${Random.nextInt(100)}")
-    val testUgi = UserGroupInformation.createUserForTesting(testUser, testGroups)
-
-    testUgi.doAs(new PrivilegedExceptionAction[Void] {
-      override def run(): Void = {
-        val sparkHadoopUtil = new SparkHadoopUtil
-
-        // If file is owned by user and user has access permission
-        var status = fileStatus(testUser, testGroups.head, READ_WRITE, READ_WRITE, NONE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
-
-        // If file is owned by user but user has no access permission
-        status = fileStatus(testUser, testGroups.head, NONE, READ_WRITE, NONE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
-
-        val otherUser = s"test-${Random.nextInt(100)}"
-        val otherGroup = s"test-${Random.nextInt(100)}"
-
-        // If file is owned by user's group and user's group has access permission
-        status = fileStatus(otherUser, testGroups.head, NONE, READ_WRITE, NONE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
-
-        // If file is owned by user's group but user's group has no access permission
-        status = fileStatus(otherUser, testGroups.head, READ_WRITE, NONE, NONE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
-
-        // If file is owned by other user and this user has access permission
-        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, READ_WRITE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
-
-        // If file is owned by other user but this user has no access permission
-        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, NONE)
-        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
-        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
-
-        null
-      }
-    })
-  }
-
-  private def fileStatus(
-      owner: String,
-      group: String,
-      userAction: FsAction,
-      groupAction: FsAction,
-      otherAction: FsAction): FileStatus = {
-    new FileStatus(0L,
-      false,
-      0,
-      0L,
-      0L,
-      0L,
-      new FsPermission(userAction, groupAction, otherAction),
-      owner,
-      group,
-      null)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 456158d41b93f..6005deb3fbf04 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -27,11 +27,13 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
-import org.apache.hadoop.fs.FileStatus
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.security.AccessControlException
 import org.json4s.jackson.JsonMethods._
-import org.mockito.Matchers.any
-import org.mockito.Mockito.{mock, spy, verify}
+import org.mockito.ArgumentMatcher
+import org.mockito.Matchers.{any, argThat}
+import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
@@ -583,6 +585,42 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
  }
 
+  test("SPARK-24948: blacklist files we don't have read permission on") {
+    val clock = new ManualClock(1533132471)
+    val provider = new FsHistoryProvider(createTestConf(), clock)
+    val accessDenied = newLogFile("accessDenied", None, inProgress = false)
+    writeFile(accessDenied, true, None,
+      SparkListenerApplicationStart("accessDenied", Some("accessDenied"), 1L, "test", None))
+    val accessGranted = newLogFile("accessGranted", None, inProgress = false)
+    writeFile(accessGranted, true, None,
+      SparkListenerApplicationStart("accessGranted", Some("accessGranted"), 1L, "test", None),
+      SparkListenerApplicationEnd(5L))
+    val mockedFs = spy(provider.fs)
+    doThrow(new AccessControlException("Cannot read accessDenied file")).when(mockedFs).open(
+      argThat(new ArgumentMatcher[Path]() {
+        override def matches(path: Any): Boolean = {
+          path.asInstanceOf[Path].getName.toLowerCase == "accessdenied"
+        }
+      }))
+    val mockedProvider = spy(provider)
+    when(mockedProvider.fs).thenReturn(mockedFs)
+    updateAndCheck(mockedProvider) { list =>
+      list.size should be(1)
+    }
+    writeFile(accessDenied, true, None,
+      SparkListenerApplicationStart("accessDenied", Some("accessDenied"), 1L, "test", None),
+      SparkListenerApplicationEnd(5L))
+    // Doing 2 times in order to check the blacklist filter too
+    updateAndCheck(mockedProvider) { list =>
+      list.size should be(1)
+    }
+    val accessDeniedPath = new Path(accessDenied.getPath)
+    assert(mockedProvider.isBlacklisted(accessDeniedPath))
+    clock.advance(24 * 60 * 60 * 1000 + 1) // add a bit more than 1d
+    mockedProvider.cleanLogs()
+    assert(!mockedProvider.isBlacklisted(accessDeniedPath))
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:

From 657d3643e63d79095c47b45ce14429e9fa08f25b Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 7 Aug 2018 11:15:46 +0200
Subject: [PATCH 2/4] fix build error

---
 .../org/apache/spark/deploy/history/FsHistoryProvider.scala    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 38f2491ac3928..a04fb8d600e18 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -22,15 +22,14 @@ import java.util.UUID
 import java.util.concurrent.{ConcurrentHashMap, Executors, ExecutorService, Future, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.concurrent.ExecutionException
-import scala.util.Try
 import scala.xml.Node
 
 import com.google.common.io.ByteStreams
 import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.apache.hadoop.fs.permission.FsAction
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
 import org.apache.hadoop.security.AccessControlException

From ddd5c50c7b6de8663ac6412a82bcb8ad786db2c4 Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 7 Aug 2018 15:06:43 +0200
Subject: [PATCH 3/4] fix ut failures

---
 .../deploy/history/FsHistoryProvider.scala      | 17 ++++++++---------
 .../deploy/history/FsHistoryProviderSuite.scala | 11 +----------
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index a04fb8d600e18..66edaf1fbc930 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -24,7 +24,6 @@ import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.concurrent.ExecutionException
 import scala.xml.Node
 
 import com.google.common.io.ByteStreams
@@ -358,14 +357,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         logDebug(s"New/updated attempts found: ${logInfos.size} ${logInfos.map(_.getPath)}")
       }
 
-      var tasks = mutable.ListBuffer[(Future[Unit], Path)]()
+      var tasks = mutable.ListBuffer[Future[_]]()
 
       try {
         for (file <- logInfos) {
-          val task: Future[Unit] = replayExecutor.submit(new Runnable {
+          tasks += replayExecutor.submit(new Runnable {
             override def run(): Unit = mergeApplicationListing(file)
           }, Unit)
-          tasks += (task -> file.getPath)
         }
       } catch {
         // let the iteration over logInfos break, since an exception on
@@ -378,7 +376,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
       pendingReplayTasksCount.addAndGet(tasks.size)
 
-      tasks.foreach { case (task, path) =>
+      tasks.foreach { task =>
         try {
           // Wait for all tasks to finish. This makes sure that checkForLogs
           // is not scheduled again while some tasks are already running in
@@ -387,10 +385,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         } catch {
           case e: InterruptedException =>
             throw e
-          case e: ExecutionException if e.getCause.isInstanceOf[AccessControlException] =>
-            // We don't have read permissions on the log file
-            logWarning(s"Unable to read log $path", e.getCause)
-            blacklist(path)
           case e: Exception =>
             logError("Exception while merging application listings", e)
         } finally {
@@ -507,6 +501,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       }
 
     } catch {
+      case e: AccessControlException =>
+        // We don't have read permissions on the log file
+        logWarning(s"Unable to read log ${fileStatus.getPath}", e.getCause)
+        blacklist(fileStatus.getPath)
+        None
       case e: Exception =>
         logError(
           s"Exception encountered when attempting to load application log ${fileStatus.getPath}",
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 6005deb3fbf04..302851c83e9f9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -137,14 +137,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     // setReadable(...) does not work on Windows. Please refer JDK-6728842.
     assume(!Utils.isWindows)
 
-    class TestFsHistoryProvider extends FsHistoryProvider(createTestConf()) {
-      var mergeApplicationListingCall = 0
-      override protected def mergeApplicationListing(fileStatus: FileStatus): Unit = {
-        super.mergeApplicationListing(fileStatus)
-        mergeApplicationListingCall += 1
-      }
-    }
-    val provider = new TestFsHistoryProvider
+    val provider = new FsHistoryProvider(createTestConf())
 
     val logFile1 = newLogFile("new1", None, inProgress = false)
     writeFile(logFile1, true, None,
@@ -161,8 +154,6 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     updateAndCheck(provider) { list =>
       list.size should be (1)
     }
-
-    provider.mergeApplicationListingCall should be (1)
   }
 
   test("history file is renamed from inprogress to completed") {

From 16233d181b0a61d6cd45a7dc42d49a8905c964ea Mon Sep 17 00:00:00 2001
From: Marco Gaido <marcogaido91@gmail.com>
Date: Tue, 7 Aug 2018 15:07:36 +0200
Subject: [PATCH 4/4] clean diff

---
 .../org/apache/spark/deploy/history/FsHistoryProvider.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 66edaf1fbc930..12528cde2af6a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -363,7 +363,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         for (file <- logInfos) {
           tasks += replayExecutor.submit(new Runnable {
             override def run(): Unit = mergeApplicationListing(file)
-          }, Unit)
+          })
         }
       } catch {
         // let the iteration over logInfos break, since an exception on