diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index fa3c97c6957b5..b04064cc7cc96 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -231,7 +231,15 @@ private[spark] class Client( if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") driverPodCompletedLatch.await() - logInfo(s"Application $kubernetesAppId finished.") + + val exitMessage = loggingWatch.getDriverPodExitStatus match { + case (Some(error), _) => s"failed with error: $error" + case (None, 0) => "finished successfully" + case (None, code) => + s"failed with exit code $code. You may want to check the driver pod logs." + } + + logInfo(s"Application $kubernetesAppId $exitMessage") } else { logInfo(s"Application $kubernetesAppId successfully launched.") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 537bcccaa1458..94de9e754f305 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -52,6 +52,9 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) .getOrElse("unknown") + private var driverPodExitCode: Int = 0 + private var driverPodErrorMessage: Option[String] = None + def start(): Unit = { if (interval > 0) { scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) @@ -62,14 +65,17 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL this.pod = Option(pod) action match { case Action.DELETED => + driverPodErrorMessage = Some("the driver pod was deleted") closeWatch() case Action.ERROR => + driverPodErrorMessage = Some("error happened with the driver pod") closeWatch() case _ => logLongStatus() if (hasCompleted()) { + driverPodExitCode = getDriverPodExitCode closeWatch() } } @@ -129,4 +135,10 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL s"\n\t $k: $newValue" }.mkString("") } + + def getDriverPodExitCode: Int = { + pod.get.getStatus().getContainerStatuses().asScala.last.getState.getTerminated.getExitCode + } + + def getDriverPodExitStatus: (Option[String], Int) = (driverPodErrorMessage, driverPodExitCode) }