Guard against double-close() of RecordReaders.

JoshRosen · JoshRosen · commit 3d25f1798ab5 · 2015-10-30T23:09:58.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -257,8 +257,21 @@ class HadoopRDD[K, V](
       }
 
       override def close() {
-        try {
-          reader.close()
+        if (reader != null) {
+          // Close the reader and release it. Note: it's very important that we don't close the
+          // reader more than once, since that exposes us to MAPREDUCE-5918 when running against
+          // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic
+          // corruption issues when reading compressed input.
+          try {
+            reader.close()
+          } catch {
+            case e: Exception =>
+              if (!ShutdownHookManager.inShutdown()) {
+                logWarning("Exception in RecordReader.close()", e)
+              }
+          } finally {
+            reader = null
+          }
           if (bytesReadCallback.isDefined) {
             inputMetrics.updateBytesRead()
           } else if (split.inputSplit.value.isInstanceOf[FileSplit] ||
@@ -272,12 +285,6 @@ class HadoopRDD[K, V](
                 logWarning("Unable to get input size to set InputMetrics for task", e)
             }
           }
-        } catch {
-          case e: Exception => {
-            if (!ShutdownHookManager.inShutdown()) {
-              logWarning("Exception in RecordReader.close()", e)
-            }
-          }
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -158,8 +158,19 @@ class NewHadoopRDD[K, V](
       }
 
       private def close() {
-        try {
-          reader.close()
+        if (reader != null) {
+          // Close the reader and release it. Note: it's very important that we don't close the
+          // reader more than once, since that exposes us to MAPREDUCE-5918 when running against
+          // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic
+          // corruption issues when reading compressed input.
+          try {
+          } catch {
+            case e: Exception =>
+              if (!ShutdownHookManager.inShutdown()) {
+                logWarning("Exception in RecordReader.close()", e)
+              }
+          } finally {
+          }
           if (bytesReadCallback.isDefined) {
             inputMetrics.updateBytesRead()
           } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
@@ -173,12 +184,6 @@ class NewHadoopRDD[K, V](
                 logWarning("Unable to get input size to set InputMetrics for task", e)
             }
           }
-        } catch {
-          case e: Exception => {
-            if (!ShutdownHookManager.inShutdown()) {
-              logWarning("Exception in RecordReader.close()", e)
-            }
-          }
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
@@ -60,8 +60,10 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {
    */
   def closeIfNeeded() {
     if (!closed) {
-      close()
+      // Note: it's important that we set closed = true before calling close(), since setting it
+      // afterwards would permit us to call close() multiple times if close() threw an exception.
       closed = true
+      close()
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
@@ -178,8 +178,19 @@ private[sql] class SqlNewHadoopRDD[K, V](
       }
 
       private def close() {
-        try {
-          reader.close()
+        if (reader != null) {
+          // Close the reader and release it. Note: it's very important that we don't close the
+          // reader more than once, since that exposes us to MAPREDUCE-5918 when running against
+          // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic
+          // corruption issues when reading compressed input.
+          try {
+          } catch {
+            case e: Exception =>
+              if (!ShutdownHookManager.inShutdown()) {
+                logWarning("Exception in RecordReader.close()", e)
+              }
+          } finally {
+          }
           if (bytesReadCallback.isDefined) {
             inputMetrics.updateBytesRead()
           } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
@@ -193,12 +204,6 @@ private[sql] class SqlNewHadoopRDD[K, V](
                 logWarning("Unable to get input size to set InputMetrics for task", e)
             }
           }
-        } catch {
-          case e: Exception => {
-            if (!ShutdownHookManager.inShutdown()) {
-              logWarning("Exception in RecordReader.close()", e)
-            }
-          }
         }
       }
     }

Original file line number	Diff line number	Diff line change
`@@ -158,8 +158,19 @@ class NewHadoopRDD[K, V](`
`158`	`158`	`}`
`159`	`159`
`160`	`160`	`private def close() {`
`161`		`- try {`
`162`		`- reader.close()`
	`161`	`+ if (reader != null) {`
	`162`	`+ // Close the reader and release it. Note: it's very important that we don't close the`
	`163`	`+ // reader more than once, since that exposes us to MAPREDUCE-5918 when running against`
	`164`	`+ // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic`
	`165`	`+ // corruption issues when reading compressed input.`
	`166`	`+ try {`
	`167`	`+ } catch {`
	`168`	`+ case e: Exception =>`
	`169`	`+ if (!ShutdownHookManager.inShutdown()) {`
	`170`	`+ logWarning("Exception in RecordReader.close()", e)`
	`171`	`+ }`
	`172`	`+ } finally {`
	`173`	`+ }`
`163`	`174`	`if (bytesReadCallback.isDefined) {`
`164`	`175`	`inputMetrics.updateBytesRead()`
`165`	`176`	`} else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] \|\|`
`@@ -173,12 +184,6 @@ class NewHadoopRDD[K, V](`
`173`	`184`	`logWarning("Unable to get input size to set InputMetrics for task", e)`
`174`	`185`	`}`
`175`	`186`	`}`
`176`		`- } catch {`
`177`		`- case e: Exception => {`
`178`		`- if (!ShutdownHookManager.inShutdown()) {`
`179`		`- logWarning("Exception in RecordReader.close()", e)`
`180`		`- }`
`181`		`- }`
`182`	`187`	`}`
`183`	`188`	`}`
`184`	`189`	`}`
Original file line number	Diff line number	Diff line change
`@@ -60,8 +60,10 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {`
`60`	`60`	`*/`
`61`	`61`	`def closeIfNeeded() {`
`62`	`62`	`if (!closed) {`
`63`		`- close()`
	`63`	`+ // Note: it's important that we set closed = true before calling close(), since setting it`
	`64`	`+ // afterwards would permit us to call close() multiple times if close() threw an exception.`
`64`	`65`	`closed = true`
	`66`	`+ close()`
`65`	`67`	`}`
`66`	`68`	`}`
`67`	`69`
Original file line number	Diff line number	Diff line change
`@@ -178,8 +178,19 @@ private[sql] class SqlNewHadoopRDD[K, V](`
`178`	`178`	`}`
`179`	`179`
`180`	`180`	`private def close() {`
`181`		`- try {`
`182`		`- reader.close()`
	`181`	`+ if (reader != null) {`
	`182`	`+ // Close the reader and release it. Note: it's very important that we don't close the`
	`183`	`+ // reader more than once, since that exposes us to MAPREDUCE-5918 when running against`
	`184`	`+ // Hadoop 1.x and older Hadoop 2.x releases. That bug can lead to non-deterministic`
	`185`	`+ // corruption issues when reading compressed input.`
	`186`	`+ try {`
	`187`	`+ } catch {`
	`188`	`+ case e: Exception =>`
	`189`	`+ if (!ShutdownHookManager.inShutdown()) {`
	`190`	`+ logWarning("Exception in RecordReader.close()", e)`
	`191`	`+ }`
	`192`	`+ } finally {`
	`193`	`+ }`
`183`	`194`	`if (bytesReadCallback.isDefined) {`
`184`	`195`	`inputMetrics.updateBytesRead()`
`185`	`196`	`} else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] \|\|`
`@@ -193,12 +204,6 @@ private[sql] class SqlNewHadoopRDD[K, V](`
`193`	`204`	`logWarning("Unable to get input size to set InputMetrics for task", e)`
`194`	`205`	`}`
`195`	`206`	`}`
`196`		`- } catch {`
`197`		`- case e: Exception => {`
`198`		`- if (!ShutdownHookManager.inShutdown()) {`
`199`		`- logWarning("Exception in RecordReader.close()", e)`
`200`		`- }`
`201`		`- }`
`202`	`207`	`}`
`203`	`208`	`}`
`204`	`209`	`}`