-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-18467][SQL] Extracts method for preparing arguments from StaticInvoke, Invoke and NewInstance and modify to short circuit if arguments have null when needNullCheck == true.
#15901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
9bf9aa9
Introduce `InvokeLike` to extract common logic from `StaticInvoke`, `…
ueshin 28f6200
Refactor to remove unneeded null checking and fix nullability of `New…
ueshin 2f30f53
Modify to short circuit if arguments have null when propageteNull == …
ueshin 9ac3f28
Add a comment for `prepareArguments()`.
ueshin 5ad6966
Define a variable for `propagateNull && arguments.exists(_.nullable)`.
ueshin a0ac177
Add a missing param comment for `propagateNull`.
ueshin 757d33e
Rename variable from `argsHaveNull` to `containsNullInArguments`.
ueshin 240fde4
Modify `InvokeLike` not to extend `Expression` because no need to ext…
ueshin 6f6e0b3
Fix comments.
ueshin 2bd6e50
Modify for readability.
ueshin bcb93db
Make `InvokeLike` extend `Expression` and `NonSQLExpression`.
ueshin d448b60
Use `propagatingNull`.
ueshin 8894a96
Use `needNullCheck` instread of `propagatingNull`.
ueshin ca4558f
Modify `prepareArguments()` to return the result of argument null che…
ueshin 4d9a037
Skip evaluate arguments if `obj.isNull == true` in `Invoke`.
ueshin ebb1241
Modify to prepare for further optimization.
ueshin 99a59b2
Revert a part of last 2 commits.
ueshin 243888a
Revert "Revert a part of last 2 commits."
ueshin e12a9bd
Revert "Modify to prepare for further optimization."
ueshin 2c52d91
Use `obj.isNull` instead of `ev.isNull`.
ueshin 43d2693
Revert and use a simple case.
ueshin bd9c09f
Remove unused argument.
ueshin 501095f
Remove unneeded if.
ueshin 1baac55
Modify generated code of `Invoke`.
ueshin 831c521
Use resultIsNull as ev.isNull in `NewInstance`.
ueshin 0b210f8
Remove unneeded zipWithIndex.
ueshin f8acda6
Use local variables for `resultIsNull`s and `argValue`s.
ueshin fe2871c
Revert "Use local variables for `resultIsNull`s and `argValue`s."
ueshin c88a1ff
Optimize a code in `StaticInvoke`.
ueshin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,78 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo | |
| import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} | ||
| import org.apache.spark.sql.types._ | ||
|
|
||
| /** | ||
| * Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. | ||
| */ | ||
| trait InvokeLike extends Expression with NonSQLExpression { | ||
|
|
||
| def arguments: Seq[Expression] | ||
|
|
||
| def propagateNull: Boolean | ||
|
|
||
| protected lazy val needNullCheck: Boolean = propagateNull && arguments.exists(_.nullable) | ||
|
|
||
| /** | ||
| * Prepares codes for arguments. | ||
| * | ||
| * - generate codes for argument. | ||
| * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments. | ||
| * - avoid some of nullabilty checking which are not needed because the expression is not | ||
| * nullable. | ||
| * - when needNullCheck == true, short circuit if we found one of arguments is null because | ||
| * preparing rest of arguments can be skipped in the case. | ||
| * | ||
| * @param ctx a [[CodegenContext]] | ||
| * @return (code to prepare arguments, argument string, result of argument null check) | ||
| */ | ||
| def prepareArguments(ctx: CodegenContext): (String, String, String) = { | ||
|
|
||
| val resultIsNull = if (needNullCheck) { | ||
| val resultIsNull = ctx.freshName("resultIsNull") | ||
| ctx.addMutableState("boolean", resultIsNull, "") | ||
| resultIsNull | ||
| } else { | ||
| "false" | ||
| } | ||
| val argValues = arguments.map { e => | ||
| val argValue = ctx.freshName("argValue") | ||
| ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
| argValue | ||
| } | ||
|
|
||
| val argCodes = if (needNullCheck) { | ||
| val reset = s"$resultIsNull = false;" | ||
| val argCodes = arguments.zipWithIndex.map { case (e, i) => | ||
| val expr = e.genCode(ctx) | ||
| val updateResultIsNull = if (e.nullable) { | ||
| s"$resultIsNull = ${expr.isNull};" | ||
| } else { | ||
| "" | ||
| } | ||
| s""" | ||
| if (!$resultIsNull) { | ||
| ${expr.code} | ||
| $updateResultIsNull | ||
| ${argValues(i)} = ${expr.value}; | ||
| } | ||
| """ | ||
| } | ||
| reset +: argCodes | ||
| } else { | ||
| arguments.zipWithIndex.map { case (e, i) => | ||
| val expr = e.genCode(ctx) | ||
| s""" | ||
| ${expr.code} | ||
| ${argValues(i)} = ${expr.value}; | ||
| """ | ||
| } | ||
| } | ||
| val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
|
|
||
| (argCode, argValues.mkString(", "), resultIsNull) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Invokes a static function, returning the result. By default, any of the arguments being null | ||
| * will result in returning null instead of calling the function. | ||
|
|
@@ -50,7 +122,7 @@ case class StaticInvoke( | |
| dataType: DataType, | ||
| functionName: String, | ||
| arguments: Seq[Expression] = Nil, | ||
| propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
| propagateNull: Boolean = true) extends InvokeLike { | ||
|
|
||
| val objectName = staticObject.getName.stripSuffix("$") | ||
|
|
||
|
|
@@ -62,16 +134,10 @@ case class StaticInvoke( | |
|
|
||
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| val javaType = ctx.javaType(dataType) | ||
| val argGen = arguments.map(_.genCode(ctx)) | ||
| val argString = argGen.map(_.value).mkString(", ") | ||
|
|
||
| val callFunc = s"$objectName.$functionName($argString)" | ||
| val (argCode, argString, resultIsNull) = prepareArguments(ctx) | ||
|
|
||
| val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
| s"boolean ${ev.isNull} = ${argGen.map(_.isNull).mkString(" || ")};" | ||
| } else { | ||
| s"boolean ${ev.isNull} = false;" | ||
| } | ||
| val callFunc = s"$objectName.$functionName($argString)" | ||
|
|
||
| // If the function can return null, we do an extra check to make sure our null bit is still set | ||
| // correctly. | ||
|
|
@@ -82,9 +148,9 @@ case class StaticInvoke( | |
| } | ||
|
|
||
| val code = s""" | ||
| ${argGen.map(_.code).mkString("\n")} | ||
| $setIsNull | ||
| final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; | ||
| $argCode | ||
| boolean ${ev.isNull} = $resultIsNull; | ||
| final $javaType ${ev.value} = $resultIsNull ? ${ctx.defaultValue(dataType)} : $callFunc; | ||
| $postNullCheck | ||
| """ | ||
| ev.copy(code = code) | ||
|
|
@@ -103,13 +169,15 @@ case class StaticInvoke( | |
| * @param functionName The name of the method to call. | ||
| * @param dataType The expected return type of the function. | ||
| * @param arguments An optional list of expressions, whos evaluation will be passed to the function. | ||
| * @param propagateNull When true, and any of the arguments is null, null will be returned instead | ||
| * of calling the function. | ||
| */ | ||
| case class Invoke( | ||
| targetObject: Expression, | ||
| functionName: String, | ||
| dataType: DataType, | ||
| arguments: Seq[Expression] = Nil, | ||
| propagateNull: Boolean = true) extends Expression with NonSQLExpression { | ||
| propagateNull: Boolean = true) extends InvokeLike { | ||
|
|
||
| override def nullable: Boolean = true | ||
| override def children: Seq[Expression] = targetObject +: arguments | ||
|
|
@@ -131,8 +199,8 @@ case class Invoke( | |
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| val javaType = ctx.javaType(dataType) | ||
| val obj = targetObject.genCode(ctx) | ||
| val argGen = arguments.map(_.genCode(ctx)) | ||
| val argString = argGen.map(_.value).mkString(", ") | ||
|
|
||
| val (argCode, argString, resultIsNull) = prepareArguments(ctx) | ||
|
|
||
| val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive | ||
| val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty | ||
|
|
@@ -164,28 +232,26 @@ case class Invoke( | |
| """ | ||
| } | ||
|
|
||
| val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
| s"boolean ${ev.isNull} = ${obj.isNull} || ${argGen.map(_.isNull).mkString(" || ")};" | ||
| } else { | ||
| s"boolean ${ev.isNull} = ${obj.isNull};" | ||
| } | ||
|
|
||
| // If the function can return null, we do an extra check to make sure our null bit is still set | ||
| // correctly. | ||
| val postNullCheck = if (ctx.defaultValue(dataType) == "null") { | ||
| s"${ev.isNull} = ${ev.value} == null;" | ||
| } else { | ||
| "" | ||
| } | ||
|
|
||
| val code = s""" | ||
| ${obj.code} | ||
| ${argGen.map(_.code).mkString("\n")} | ||
| $setIsNull | ||
| boolean ${ev.isNull} = true; | ||
| $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; | ||
| if (!${ev.isNull}) { | ||
| $evaluate | ||
| if (!${obj.isNull}) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyway, I think this new code looks clearer than before, what do you think? @ueshin
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree with you. Thank you for your suggestion. |
||
| $argCode | ||
| ${ev.isNull} = $resultIsNull; | ||
| if (!${ev.isNull}) { | ||
| $evaluate | ||
| } | ||
| $postNullCheck | ||
| } | ||
| $postNullCheck | ||
| """ | ||
| ev.copy(code = code) | ||
| } | ||
|
|
@@ -223,10 +289,10 @@ case class NewInstance( | |
| arguments: Seq[Expression], | ||
| propagateNull: Boolean, | ||
| dataType: DataType, | ||
| outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { | ||
| outerPointer: Option[() => AnyRef]) extends InvokeLike { | ||
| private val className = cls.getName | ||
|
|
||
| override def nullable: Boolean = propagateNull | ||
| override def nullable: Boolean = needNullCheck | ||
|
|
||
| override def children: Seq[Expression] = arguments | ||
|
|
||
|
|
@@ -245,52 +311,25 @@ case class NewInstance( | |
|
|
||
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| val javaType = ctx.javaType(dataType) | ||
| val argIsNulls = ctx.freshName("argIsNulls") | ||
| ctx.addMutableState("boolean[]", argIsNulls, | ||
| s"$argIsNulls = new boolean[${arguments.size}];") | ||
| val argValues = arguments.zipWithIndex.map { case (e, i) => | ||
| val argValue = ctx.freshName("argValue") | ||
| ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") | ||
| argValue | ||
| } | ||
|
|
||
| val argCodes = arguments.zipWithIndex.map { case (e, i) => | ||
| val expr = e.genCode(ctx) | ||
| expr.code + s""" | ||
| $argIsNulls[$i] = ${expr.isNull}; | ||
| ${argValues(i)} = ${expr.value}; | ||
| """ | ||
| } | ||
| val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) | ||
| val (argCode, argString, resultIsNull) = prepareArguments(ctx) | ||
|
|
||
| val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) | ||
|
|
||
| var isNull = ev.isNull | ||
| val setIsNull = if (propagateNull && arguments.nonEmpty) { | ||
| s""" | ||
| boolean $isNull = false; | ||
| for (int idx = 0; idx < ${arguments.length}; idx++) { | ||
| if ($argIsNulls[idx]) { $isNull = true; break; } | ||
| } | ||
| """ | ||
| } else { | ||
| isNull = "false" | ||
| "" | ||
| } | ||
| ev.isNull = resultIsNull | ||
|
|
||
| val constructorCall = outer.map { gen => | ||
| s"""${gen.value}.new ${cls.getSimpleName}(${argValues.mkString(", ")})""" | ||
| s"${gen.value}.new ${cls.getSimpleName}($argString)" | ||
| }.getOrElse { | ||
| s"new $className(${argValues.mkString(", ")})" | ||
| s"new $className($argString)" | ||
| } | ||
|
|
||
| val code = s""" | ||
| $argCode | ||
| ${outer.map(_.code).getOrElse("")} | ||
| $setIsNull | ||
| final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; | ||
| """ | ||
| ev.copy(code = code, isNull = isNull) | ||
| final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(javaType)} : $constructorCall; | ||
| """ | ||
| ev.copy(code = code) | ||
| } | ||
|
|
||
| override def toString: String = s"newInstance($cls)" | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I'll use it.