-
Notifications
You must be signed in to change notification settings - Fork 28.9k
SPARK-19794 Release HDFS Client after read/write checkpoint #17135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -181,21 +181,26 @@ private[spark] object ReliableCheckpointRDD extends Logging { | |
| serializeStream.writeAll(iterator) | ||
| } { | ||
| serializeStream.close() | ||
| fileOutputStream.close() | ||
| } | ||
|
|
||
| if (!fs.rename(tempOutputPath, finalOutputPath)) { | ||
| if (!fs.exists(finalOutputPath)) { | ||
| logInfo(s"Deleting tempOutputPath $tempOutputPath") | ||
| fs.delete(tempOutputPath, false) | ||
| throw new IOException("Checkpoint failed: failed to save output of task: " + | ||
| s"${ctx.attemptNumber()} and final output path does not exist: $finalOutputPath") | ||
| } else { | ||
| // Some other copy of this task must've finished before us and renamed it | ||
| logInfo(s"Final output path $finalOutputPath already exists; not overwriting it") | ||
| if (!fs.delete(tempOutputPath, false)) { | ||
| logWarning(s"Error deleting ${tempOutputPath}") | ||
| try { | ||
| if (!fs.rename(tempOutputPath, finalOutputPath)) { | ||
| if (!fs.exists(finalOutputPath)) { | ||
| logInfo(s"Deleting tempOutputPath $tempOutputPath") | ||
| fs.delete(tempOutputPath, false) | ||
| throw new IOException("Checkpoint failed: failed to save output of task: " + | ||
| s"${ctx.attemptNumber()} and final output path does not exist: $finalOutputPath") | ||
| } else { | ||
| // Some other copy of this task must've finished before us and renamed it | ||
| logInfo(s"Final output path $finalOutputPath already exists; not overwriting it") | ||
| if (!fs.delete(tempOutputPath, false)) { | ||
| logWarning(s"Error deleting ${tempOutputPath}") | ||
| } | ||
| } | ||
| } | ||
| } finally { | ||
| fs.close() | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -216,6 +221,8 @@ private[spark] object ReliableCheckpointRDD extends Logging { | |
| serializeStream.writeObject(partitioner) | ||
| } { | ||
| serializeStream.close() | ||
| fileOutputStream.close() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto, this is OK if |
||
| fs.close() | ||
| } | ||
| logDebug(s"Written partitioner to $partitionerFilePath") | ||
| } catch { | ||
|
|
@@ -248,6 +255,7 @@ private[spark] object ReliableCheckpointRDD extends Logging { | |
| } | ||
| } { | ||
| fileInputStream.close() | ||
| fs.close() | ||
| } | ||
|
|
||
| logDebug(s"Read partitioner from $partitionerFilePath") | ||
|
|
@@ -279,8 +287,13 @@ private[spark] object ReliableCheckpointRDD extends Logging { | |
|
|
||
| // Register an on-task-completion callback to close the input stream. | ||
| context.addTaskCompletionListener(context => deserializeStream.close()) | ||
|
|
||
| deserializeStream.asIterator.asInstanceOf[Iterator[T]] | ||
| Utils.tryWithSafeFinally { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you can close it here, right? you're returning an iterator on the stream
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code will introduce issue, Also please look at L289, it already takes care of |
||
| deserializeStream.asIterator.asInstanceOf[Iterator[T]] | ||
| } { | ||
| deserializeStream.close() | ||
| fileInputStream.close() | ||
| fs.close() | ||
| } | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given that this doesn't encompass the span of usage for
fs-- better to just callfs.close()at the end and not worry about manually closing in an error case? or expand the try-finally?Actually, I am not sure we are supposed to call
FileSystem.close()because they are shared instances, cached and reused across the whole application.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed with @srowen ,
FileSystemis a cached object, closing it means removed it from cache. I don't think we need to call this explicitly. Because by default it is designed to be shared.