-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-53063][CORE] Implement and call new APIs in FileCommitProtocol instead of the deprecated #51772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-53063][CORE] Implement and call new APIs in FileCommitProtocol instead of the deprecated #51772
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.internal.io | |
| import org.apache.hadoop.fs._ | ||
| import org.apache.hadoop.mapreduce._ | ||
|
|
||
| import org.apache.spark.SparkException | ||
| import org.apache.spark.annotation.Unstable | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.util.Utils | ||
|
|
@@ -96,7 +97,9 @@ abstract class FileCommitProtocol extends Logging { | |
| * guarantees that files written by different tasks will not conflict. | ||
| */ | ||
| @deprecated("use newTaskTempFile(..., spec: FileNameSpec) instead", "3.3.0") | ||
| def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String | ||
| def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = { | ||
| throw SparkException.mustOverrideOneMethodError("newTaskTempFile") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In other words, I don't think this is required to achieve your goal.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, it is not clear why these changes are being made. |
||
| } | ||
|
|
||
| /** | ||
| * Notifies the commit protocol to add a new file, and gets back the full path that should be | ||
|
|
@@ -135,7 +138,9 @@ abstract class FileCommitProtocol extends Logging { | |
| */ | ||
| @deprecated("use newTaskTempFileAbsPath(..., spec: FileNameSpec) instead", "3.3.0") | ||
| def newTaskTempFileAbsPath( | ||
| taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String | ||
| taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = { | ||
| throw SparkException.mustOverrideOneMethodError("newTaskTempFileAbsPath") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
| } | ||
|
|
||
| /** | ||
| * Similar to newTaskTempFile(), but allows files to committed to an absolute output location. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,7 @@ import java.util.Locale | |
|
|
||
| import org.apache.hadoop.fs.Path | ||
|
|
||
| import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkRuntimeException, SparkThrowable, SparkUnsupportedOperationException} | ||
| import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkThrowable, SparkUnsupportedOperationException} | ||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, FunctionIdentifier, InternalRow, QualifiedTableName, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, Star, TableAlreadyExistsException, UnresolvedRegex} | ||
|
|
@@ -4203,9 +4203,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat | |
| def mustOverrideOneMethodError(methodName: String): RuntimeException = { | ||
| val msg = s"You must override one `$methodName`. It's preferred to not override the " + | ||
| "deprecated one." | ||
| new SparkRuntimeException( | ||
| "INTERNAL_ERROR", | ||
| Map("message" -> msg)) | ||
| SparkException.mustOverrideOneMethodError(msg) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be |
||
| } | ||
|
|
||
| def cannotAssignEventTimeColumn(): Throwable = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -172,7 +172,7 @@ class SingleDirectoryDataWriter( | |
| val currentPath = committer.newTaskTempFile( | ||
| taskAttemptContext, | ||
| None, | ||
| f"-c$fileCounter%03d" + ext) | ||
| FileNameSpec("", f"-c$fileCounter%03d" + ext)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like a new independent change, too.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Calling the newer newTaskTempFile with spec is the goal of this PR
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PR title is not~
If this is this PR goal, please remove the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun, I got your point. I've changed the title to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you. I trust your decision. Initially, I want to remove the default implementation,
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you @dongjoon-hyun |
||
|
|
||
| currentWriter = description.outputWriterFactory.newInstance( | ||
| path = currentPath, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.