From 9bf9aa9d3f376e65132fb3551805ce59958f73c0 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Tue, 15 Nov 2016 18:18:44 +0900 Subject: [PATCH 01/29] Introduce `InvokeLike` to extract common logic from `StaticInvoke`, `Invoke` and `NewInstance`. --- .../expressions/objects/objects.scala | 106 ++++++++++-------- 1 file changed, 60 insertions(+), 46 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 50e2ac3c36d93..c73315829a5c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -32,6 +32,49 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types._ +/** + * Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. + */ +trait InvokeLike extends Expression { + + def arguments: Seq[Expression] + + def propagateNull: Boolean + + def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { + + val argIsNulls = ctx.freshName("argIsNulls") + ctx.addMutableState("boolean[]", argIsNulls, + s"$argIsNulls = new boolean[${arguments.size}];") + val argValues = arguments.zipWithIndex.map { case (e, i) => + val argValue = ctx.freshName("argValue") + ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") + argValue + } + + val argCodes = arguments.zipWithIndex.map { case (e, i) => + val expr = e.genCode(ctx) + expr.code + s""" + $argIsNulls[$i] = ${expr.isNull}; + ${argValues(i)} = ${expr.value}; + """ + } + val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) + + val setIsNull = if (propagateNull && arguments.nonEmpty) { + s""" + for (int idx = 0; idx < ${arguments.length}; idx++) { + if ($argIsNulls[idx]) { ${ev.isNull} = true; break; } + } + """ + } else { + "" + } + + (argCode, argValues.mkString(", "), setIsNull) + } +} + /** * Invokes a static function, returning the result. By default, any of the arguments being null * will result in returning null instead of calling the function. @@ -50,7 +93,7 @@ case class StaticInvoke( dataType: DataType, functionName: String, arguments: Seq[Expression] = Nil, - propagateNull: Boolean = true) extends Expression with NonSQLExpression { + propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { val objectName = staticObject.getName.stripSuffix("$") @@ -62,16 +105,10 @@ case class StaticInvoke( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val argGen = arguments.map(_.genCode(ctx)) - val argString = argGen.map(_.value).mkString(", ") - val callFunc = s"$objectName.$functionName($argString)" + val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) - val setIsNull = if (propagateNull && arguments.nonEmpty) { - s"boolean ${ev.isNull} = ${argGen.map(_.isNull).mkString(" || ")};" - } else { - s"boolean ${ev.isNull} = false;" - } + val callFunc = s"$objectName.$functionName($argString)" // If the function can return null, we do an extra check to make sure our null bit is still set // correctly. @@ -82,7 +119,8 @@ case class StaticInvoke( } val code = s""" - ${argGen.map(_.code).mkString("\n")} + $argCode + boolean ${ev.isNull} = false; $setIsNull final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; $postNullCheck @@ -109,7 +147,7 @@ case class Invoke( functionName: String, dataType: DataType, arguments: Seq[Expression] = Nil, - propagateNull: Boolean = true) extends Expression with NonSQLExpression { + propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { override def nullable: Boolean = true override def children: Seq[Expression] = targetObject +: arguments @@ -131,8 +169,8 @@ case class Invoke( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) val obj = targetObject.genCode(ctx) - val argGen = arguments.map(_.genCode(ctx)) - val argString = argGen.map(_.value).mkString(", ") + + val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -164,12 +202,6 @@ case class Invoke( """ } - val setIsNull = if (propagateNull && arguments.nonEmpty) { - s"boolean ${ev.isNull} = ${obj.isNull} || ${argGen.map(_.isNull).mkString(" || ")};" - } else { - s"boolean ${ev.isNull} = ${obj.isNull};" - } - // If the function can return null, we do an extra check to make sure our null bit is still set // correctly. val postNullCheck = if (ctx.defaultValue(dataType) == "null") { @@ -179,7 +211,8 @@ case class Invoke( } val code = s""" ${obj.code} - ${argGen.map(_.code).mkString("\n")} + $argCode + boolean ${ev.isNull} = ${obj.isNull}; $setIsNull $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { @@ -223,7 +256,7 @@ case class NewInstance( arguments: Seq[Expression], propagateNull: Boolean, dataType: DataType, - outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { + outerPointer: Option[() => AnyRef]) extends Expression with InvokeLike with NonSQLExpression { private val className = cls.getName override def nullable: Boolean = propagateNull @@ -245,48 +278,29 @@ case class NewInstance( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val argIsNulls = ctx.freshName("argIsNulls") - ctx.addMutableState("boolean[]", argIsNulls, - s"$argIsNulls = new boolean[${arguments.size}];") - val argValues = arguments.zipWithIndex.map { case (e, i) => - val argValue = ctx.freshName("argValue") - ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") - argValue - } - val argCodes = arguments.zipWithIndex.map { case (e, i) => - val expr = e.genCode(ctx) - expr.code + s""" - $argIsNulls[$i] = ${expr.isNull}; - ${argValues(i)} = ${expr.value}; - """ - } - val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) + val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) var isNull = ev.isNull - val setIsNull = if (propagateNull && arguments.nonEmpty) { - s""" - boolean $isNull = false; - for (int idx = 0; idx < ${arguments.length}; idx++) { - if ($argIsNulls[idx]) { $isNull = true; break; } - } - """ + val prepareIsNull = if (propagateNull && arguments.nonEmpty) { + s"boolean $isNull = false;" } else { isNull = "false" "" } val constructorCall = outer.map { gen => - s"""${gen.value}.new ${cls.getSimpleName}(${argValues.mkString(", ")})""" + s"${gen.value}.new ${cls.getSimpleName}($argString)" }.getOrElse { - s"new $className(${argValues.mkString(", ")})" + s"new $className($argString)" } val code = s""" $argCode ${outer.map(_.code).getOrElse("")} + $prepareIsNull $setIsNull final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; """ From 28f6200f2911cfe757a538b0ef633659a5f9c52f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Tue, 15 Nov 2016 18:46:30 +0900 Subject: [PATCH 02/29] Refactor to remove unneeded null checking and fix nullability of `NewInstance`. --- .../expressions/objects/objects.scala | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index c73315829a5c5..c4d30919c223b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -43,9 +43,15 @@ trait InvokeLike extends Expression { def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val argIsNulls = ctx.freshName("argIsNulls") - ctx.addMutableState("boolean[]", argIsNulls, - s"$argIsNulls = new boolean[${arguments.size}];") + val argIsNulls = + if (propagateNull && arguments.exists(_.nullable)) { + val argIsNulls = ctx.freshName("argIsNulls") + ctx.addMutableState("boolean[]", argIsNulls, + s"$argIsNulls = new boolean[${arguments.size}];") + argIsNulls + } else { + "" + } val argValues = arguments.zipWithIndex.map { case (e, i) => val argValue = ctx.freshName("argValue") ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") @@ -54,14 +60,19 @@ trait InvokeLike extends Expression { val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) - expr.code + s""" - $argIsNulls[$i] = ${expr.isNull}; - ${argValues(i)} = ${expr.value}; - """ + expr.code + + (if (propagateNull && e.nullable) { + s""" + $argIsNulls[$i] = ${expr.isNull}; + ${argValues(i)} = ${expr.value}; + """ + } else { + s"${argValues(i)} = ${expr.value};" + }) } val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) - val setIsNull = if (propagateNull && arguments.nonEmpty) { + val setIsNull = if (propagateNull && arguments.exists(_.nullable)) { s""" for (int idx = 0; idx < ${arguments.length}; idx++) { if ($argIsNulls[idx]) { ${ev.isNull} = true; break; } @@ -259,7 +270,7 @@ case class NewInstance( outerPointer: Option[() => AnyRef]) extends Expression with InvokeLike with NonSQLExpression { private val className = cls.getName - override def nullable: Boolean = propagateNull + override def nullable: Boolean = propagateNull && arguments.exists(_.nullable) override def children: Seq[Expression] = arguments @@ -284,7 +295,7 @@ case class NewInstance( val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) var isNull = ev.isNull - val prepareIsNull = if (propagateNull && arguments.nonEmpty) { + val prepareIsNull = if (propagateNull && arguments.exists(_.nullable)) { s"boolean $isNull = false;" } else { isNull = "false" @@ -302,8 +313,12 @@ case class NewInstance( ${outer.map(_.code).getOrElse("")} $prepareIsNull $setIsNull - final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; - """ + """ + + (if (propagateNull && arguments.exists(_.nullable)) { + s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" + } else { + s"final $javaType ${ev.value} = $constructorCall;" + }) ev.copy(code = code, isNull = isNull) } From 2f30f53cf3c9244ba2171a9300e2427bf50e4d7f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 16 Nov 2016 12:42:42 +0900 Subject: [PATCH 03/29] Modify to short circuit if arguments have null when propageteNull == true. --- .../expressions/objects/objects.scala | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index c4d30919c223b..0ba67c0b40ba4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -43,12 +43,11 @@ trait InvokeLike extends Expression { def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val argIsNulls = + val argsHaveNull = if (propagateNull && arguments.exists(_.nullable)) { - val argIsNulls = ctx.freshName("argIsNulls") - ctx.addMutableState("boolean[]", argIsNulls, - s"$argIsNulls = new boolean[${arguments.size}];") - argIsNulls + val argsHaveNull = ctx.freshName("argsHaveNull") + ctx.addMutableState("boolean", argsHaveNull, "") + argsHaveNull } else { "" } @@ -58,26 +57,40 @@ trait InvokeLike extends Expression { argValue } - val argCodes = arguments.zipWithIndex.map { case (e, i) => - val expr = e.genCode(ctx) - expr.code + - (if (propagateNull && e.nullable) { + val argCodes = + if (propagateNull && arguments.exists(_.nullable)) { + s"$argsHaveNull = false;" +: + arguments.zipWithIndex.map { case (e, i) => + val expr = e.genCode(ctx) s""" - $argIsNulls[$i] = ${expr.isNull}; - ${argValues(i)} = ${expr.value}; + if (!$argsHaveNull) { + ${expr.code} + """ + + (if (e.nullable) { + s""" + $argsHaveNull = ${expr.isNull}; + ${argValues(i)} = ${expr.value}; + """ + } else { + s"${argValues(i)} = ${expr.value};" + }) + """ - } else { - s"${argValues(i)} = ${expr.value};" - }) - } + } + """ + } + } else { + arguments.zipWithIndex.map { case (e, i) => + val expr = e.genCode(ctx) + s""" + ${expr.code} + ${argValues(i)} = ${expr.value}; + """ + } + } val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) val setIsNull = if (propagateNull && arguments.exists(_.nullable)) { - s""" - for (int idx = 0; idx < ${arguments.length}; idx++) { - if ($argIsNulls[idx]) { ${ev.isNull} = true; break; } - } - """ + s"${ev.isNull} = ${ev.isNull} || $argsHaveNull;" } else { "" } From 9ac3f2836cb460bcfd0329ffbdc9cd548943a60d Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 12:19:00 +0900 Subject: [PATCH 04/29] Add a comment for `prepareArguments()`. --- .../sql/catalyst/expressions/objects/objects.scala | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 0ba67c0b40ba4..1b6eac073d8b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -41,6 +41,20 @@ trait InvokeLike extends Expression { def propagateNull: Boolean + /** + * Prepares codes for arguments. + * + * - generate codes for argument. + * - use ctx.splitExpressions() not to exceed 64kb JVM limit while preparing arguments. + * - avoid some of nullabilty checking which are not needed because the expression is not + * nullable. + * - when progagateNull == true, short circuit if we found one of arguments is null because + * preparing rest of arguments can be skipped in the case. + * + * @param ctx a [[CodegenContext]] + * @param ev an [[ExprCode]] with unique terms. + * @return (code to prepare arguments, argument string, code to set isNull) + */ def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { val argsHaveNull = From 5ad6966d7fd90e126884e72ba4e5c4f7c99be868 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 12:56:08 +0900 Subject: [PATCH 05/29] Define a variable for `propagateNull && arguments.exists(_.nullable)`. --- .../expressions/objects/objects.scala | 75 ++++++++++--------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 1b6eac073d8b5..a4ad013d7cac5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -41,6 +41,8 @@ trait InvokeLike extends Expression { def propagateNull: Boolean + protected lazy val propagatingNull: Boolean = propagateNull && arguments.exists(_.nullable) + /** * Prepares codes for arguments. * @@ -57,53 +59,52 @@ trait InvokeLike extends Expression { */ def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val argsHaveNull = - if (propagateNull && arguments.exists(_.nullable)) { - val argsHaveNull = ctx.freshName("argsHaveNull") - ctx.addMutableState("boolean", argsHaveNull, "") - argsHaveNull - } else { - "" - } + val argsHaveNull = if (propagatingNull) { + val argsHaveNull = ctx.freshName("argsHaveNull") + ctx.addMutableState("boolean", argsHaveNull, "") + argsHaveNull + } else { + "" + } val argValues = arguments.zipWithIndex.map { case (e, i) => val argValue = ctx.freshName("argValue") ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") argValue } - val argCodes = - if (propagateNull && arguments.exists(_.nullable)) { - s"$argsHaveNull = false;" +: - arguments.zipWithIndex.map { case (e, i) => - val expr = e.genCode(ctx) + val argCodes = if (propagatingNull) { + val reset = s"$argsHaveNull = false;" + val argCodes = arguments.zipWithIndex.map { case (e, i) => + val expr = e.genCode(ctx) + s""" + if (!$argsHaveNull) { + ${expr.code} + """ + + (if (e.nullable) { s""" - if (!$argsHaveNull) { - ${expr.code} - """ + - (if (e.nullable) { - s""" - $argsHaveNull = ${expr.isNull}; - ${argValues(i)} = ${expr.value}; - """ - } else { - s"${argValues(i)} = ${expr.value};" - }) + - """ - } + $argsHaveNull = ${expr.isNull}; + ${argValues(i)} = ${expr.value}; """ + } else { + s"${argValues(i)} = ${expr.value};" + }) + + """ } - } else { - arguments.zipWithIndex.map { case (e, i) => - val expr = e.genCode(ctx) - s""" - ${expr.code} - ${argValues(i)} = ${expr.value}; - """ - } + """ } + reset +: argCodes + } else { + arguments.zipWithIndex.map { case (e, i) => + val expr = e.genCode(ctx) + s""" + ${expr.code} + ${argValues(i)} = ${expr.value}; + """ + } + } val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) - val setIsNull = if (propagateNull && arguments.exists(_.nullable)) { + val setIsNull = if (propagatingNull) { s"${ev.isNull} = ${ev.isNull} || $argsHaveNull;" } else { "" @@ -297,7 +298,7 @@ case class NewInstance( outerPointer: Option[() => AnyRef]) extends Expression with InvokeLike with NonSQLExpression { private val className = cls.getName - override def nullable: Boolean = propagateNull && arguments.exists(_.nullable) + override def nullable: Boolean = propagatingNull override def children: Seq[Expression] = arguments @@ -341,7 +342,7 @@ case class NewInstance( $prepareIsNull $setIsNull """ + - (if (propagateNull && arguments.exists(_.nullable)) { + (if (propagatingNull) { s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" } else { s"final $javaType ${ev.value} = $constructorCall;" From a0ac177eaef56af1f392235705dbf1540d48e820 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 12:57:06 +0900 Subject: [PATCH 06/29] Add a missing param comment for `propagateNull`. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index a4ad013d7cac5..3eeb153e11a98 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -180,6 +180,8 @@ case class StaticInvoke( * @param functionName The name of the method to call. * @param dataType The expected return type of the function. * @param arguments An optional list of expressions, whos evaluation will be passed to the function. + * @param propagateNull When true, and any of the arguments is null, null will be returned instead + * of calling the function. */ case class Invoke( targetObject: Expression, From 757d33e68be38a690edfb8509a767067c8cc6a6e Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 12:59:22 +0900 Subject: [PATCH 07/29] Rename variable from `argsHaveNull` to `containsNullInArguments`. --- .../catalyst/expressions/objects/objects.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 3eeb153e11a98..df9d15179ff62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -59,10 +59,10 @@ trait InvokeLike extends Expression { */ def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val argsHaveNull = if (propagatingNull) { - val argsHaveNull = ctx.freshName("argsHaveNull") - ctx.addMutableState("boolean", argsHaveNull, "") - argsHaveNull + val containsNullInArguments = if (propagatingNull) { + val containsNullInArguments = ctx.freshName("containsNullInArguments") + ctx.addMutableState("boolean", containsNullInArguments, "") + containsNullInArguments } else { "" } @@ -73,16 +73,16 @@ trait InvokeLike extends Expression { } val argCodes = if (propagatingNull) { - val reset = s"$argsHaveNull = false;" + val reset = s"$containsNullInArguments = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) s""" - if (!$argsHaveNull) { + if (!$containsNullInArguments) { ${expr.code} """ + (if (e.nullable) { s""" - $argsHaveNull = ${expr.isNull}; + $containsNullInArguments = ${expr.isNull}; ${argValues(i)} = ${expr.value}; """ } else { @@ -105,7 +105,7 @@ trait InvokeLike extends Expression { val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) val setIsNull = if (propagatingNull) { - s"${ev.isNull} = ${ev.isNull} || $argsHaveNull;" + s"${ev.isNull} = ${ev.isNull} || $containsNullInArguments;" } else { "" } From 240fde493856b7bfb0568b338cf30ba0a08408df Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 13:16:35 +0900 Subject: [PATCH 08/29] Modify `InvokeLike` not to extend `Expression` because no need to extend it. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index df9d15179ff62..d935d28f5f74f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.types._ /** * Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. */ -trait InvokeLike extends Expression { +trait InvokeLike { def arguments: Seq[Expression] From 6f6e0b34446628efb21d813e87801506f97f4f5e Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 14:17:52 +0900 Subject: [PATCH 09/29] Fix comments. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index d935d28f5f74f..126233c43482d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -47,10 +47,10 @@ trait InvokeLike { * Prepares codes for arguments. * * - generate codes for argument. - * - use ctx.splitExpressions() not to exceed 64kb JVM limit while preparing arguments. + * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments. * - avoid some of nullabilty checking which are not needed because the expression is not * nullable. - * - when progagateNull == true, short circuit if we found one of arguments is null because + * - when progagatingNull == true, short circuit if we found one of arguments is null because * preparing rest of arguments can be skipped in the case. * * @param ctx a [[CodegenContext]] From 2bd6e502e5b1ab565659d3c45b97db96fa10457c Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 14:33:16 +0900 Subject: [PATCH 10/29] Modify for readability. --- .../catalyst/expressions/objects/objects.scala | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 126233c43482d..fd635d0e0b0b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -76,19 +76,16 @@ trait InvokeLike { val reset = s"$containsNullInArguments = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) + val updateContainsNull = if (e.nullable) { + s"$containsNullInArguments = ${expr.isNull};" + } else { + "" + } s""" if (!$containsNullInArguments) { ${expr.code} - """ + - (if (e.nullable) { - s""" - $containsNullInArguments = ${expr.isNull}; - ${argValues(i)} = ${expr.value}; - """ - } else { - s"${argValues(i)} = ${expr.value};" - }) + - """ + $updateContainsNull + ${argValues(i)} = ${expr.value}; } """ } From bcb93db54a025a8d9f729f21f8279593e0fdf3af Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 14:33:53 +0900 Subject: [PATCH 11/29] Make `InvokeLike` extend `Expression` and `NonSQLExpression`. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index fd635d0e0b0b4..83c0b9d604741 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.types._ /** * Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]]. */ -trait InvokeLike { +trait InvokeLike extends Expression with NonSQLExpression { def arguments: Seq[Expression] @@ -129,7 +129,7 @@ case class StaticInvoke( dataType: DataType, functionName: String, arguments: Seq[Expression] = Nil, - propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { + propagateNull: Boolean = true) extends InvokeLike { val objectName = staticObject.getName.stripSuffix("$") @@ -185,7 +185,7 @@ case class Invoke( functionName: String, dataType: DataType, arguments: Seq[Expression] = Nil, - propagateNull: Boolean = true) extends Expression with InvokeLike with NonSQLExpression { + propagateNull: Boolean = true) extends InvokeLike { override def nullable: Boolean = true override def children: Seq[Expression] = targetObject +: arguments @@ -294,7 +294,7 @@ case class NewInstance( arguments: Seq[Expression], propagateNull: Boolean, dataType: DataType, - outerPointer: Option[() => AnyRef]) extends Expression with InvokeLike with NonSQLExpression { + outerPointer: Option[() => AnyRef]) extends InvokeLike { private val className = cls.getName override def nullable: Boolean = propagatingNull From d448b60c786efd1a002c17a6458a4b3b62669efc Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 14:34:41 +0900 Subject: [PATCH 12/29] Use `propagatingNull`. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 83c0b9d604741..d4fb5b2bd2fe6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -322,7 +322,7 @@ case class NewInstance( val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) var isNull = ev.isNull - val prepareIsNull = if (propagateNull && arguments.exists(_.nullable)) { + val prepareIsNull = if (propagatingNull) { s"boolean $isNull = false;" } else { isNull = "false" From 8894a962f81e0eb73669b4dc4ba59d395fdf6bd0 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 14:43:13 +0900 Subject: [PATCH 13/29] Use `needNullCheck` instread of `propagatingNull`. --- .../catalyst/expressions/objects/objects.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index d4fb5b2bd2fe6..c674003b0dec6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -41,7 +41,7 @@ trait InvokeLike extends Expression with NonSQLExpression { def propagateNull: Boolean - protected lazy val propagatingNull: Boolean = propagateNull && arguments.exists(_.nullable) + protected lazy val needNullCheck: Boolean = propagateNull && arguments.exists(_.nullable) /** * Prepares codes for arguments. @@ -50,7 +50,7 @@ trait InvokeLike extends Expression with NonSQLExpression { * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments. * - avoid some of nullabilty checking which are not needed because the expression is not * nullable. - * - when progagatingNull == true, short circuit if we found one of arguments is null because + * - when needNullCheck == true, short circuit if we found one of arguments is null because * preparing rest of arguments can be skipped in the case. * * @param ctx a [[CodegenContext]] @@ -59,7 +59,7 @@ trait InvokeLike extends Expression with NonSQLExpression { */ def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val containsNullInArguments = if (propagatingNull) { + val containsNullInArguments = if (needNullCheck) { val containsNullInArguments = ctx.freshName("containsNullInArguments") ctx.addMutableState("boolean", containsNullInArguments, "") containsNullInArguments @@ -72,7 +72,7 @@ trait InvokeLike extends Expression with NonSQLExpression { argValue } - val argCodes = if (propagatingNull) { + val argCodes = if (needNullCheck) { val reset = s"$containsNullInArguments = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) @@ -101,7 +101,7 @@ trait InvokeLike extends Expression with NonSQLExpression { } val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) - val setIsNull = if (propagatingNull) { + val setIsNull = if (needNullCheck) { s"${ev.isNull} = ${ev.isNull} || $containsNullInArguments;" } else { "" @@ -297,7 +297,7 @@ case class NewInstance( outerPointer: Option[() => AnyRef]) extends InvokeLike { private val className = cls.getName - override def nullable: Boolean = propagatingNull + override def nullable: Boolean = needNullCheck override def children: Seq[Expression] = arguments @@ -322,7 +322,7 @@ case class NewInstance( val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) var isNull = ev.isNull - val prepareIsNull = if (propagatingNull) { + val prepareIsNull = if (needNullCheck) { s"boolean $isNull = false;" } else { isNull = "false" @@ -341,7 +341,7 @@ case class NewInstance( $prepareIsNull $setIsNull """ + - (if (propagatingNull) { + (if (needNullCheck) { s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" } else { s"final $javaType ${ev.value} = $constructorCall;" From ca4558f078df717c0d73476aecf0e2079975c69a Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 15:48:37 +0900 Subject: [PATCH 14/29] Modify `prepareArguments()` to return the result of argument null check instead of the code to update `ev.isNull`. --- .../expressions/objects/objects.scala | 45 ++++++++----------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index c674003b0dec6..12eaad4a15b63 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -55,16 +55,16 @@ trait InvokeLike extends Expression with NonSQLExpression { * * @param ctx a [[CodegenContext]] * @param ev an [[ExprCode]] with unique terms. - * @return (code to prepare arguments, argument string, code to set isNull) + * @return (code to prepare arguments, argument string, result of argument null check) */ def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { - val containsNullInArguments = if (needNullCheck) { - val containsNullInArguments = ctx.freshName("containsNullInArguments") - ctx.addMutableState("boolean", containsNullInArguments, "") - containsNullInArguments + val resultIsNull = if (needNullCheck) { + val resultIsNull = ctx.freshName("resultIsNull") + ctx.addMutableState("boolean", resultIsNull, "") + resultIsNull } else { - "" + "false" } val argValues = arguments.zipWithIndex.map { case (e, i) => val argValue = ctx.freshName("argValue") @@ -73,18 +73,18 @@ trait InvokeLike extends Expression with NonSQLExpression { } val argCodes = if (needNullCheck) { - val reset = s"$containsNullInArguments = false;" + val reset = s"$resultIsNull = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) - val updateContainsNull = if (e.nullable) { - s"$containsNullInArguments = ${expr.isNull};" + val updateResultIsNull = if (e.nullable) { + s"$resultIsNull = ${expr.isNull};" } else { "" } s""" - if (!$containsNullInArguments) { + if (!$resultIsNull) { ${expr.code} - $updateContainsNull + $updateResultIsNull ${argValues(i)} = ${expr.value}; } """ @@ -101,13 +101,7 @@ trait InvokeLike extends Expression with NonSQLExpression { } val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) - val setIsNull = if (needNullCheck) { - s"${ev.isNull} = ${ev.isNull} || $containsNullInArguments;" - } else { - "" - } - - (argCode, argValues.mkString(", "), setIsNull) + (argCode, argValues.mkString(", "), resultIsNull) } } @@ -142,7 +136,7 @@ case class StaticInvoke( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) val callFunc = s"$objectName.$functionName($argString)" @@ -156,8 +150,7 @@ case class StaticInvoke( val code = s""" $argCode - boolean ${ev.isNull} = false; - $setIsNull + boolean ${ev.isNull} = $resultIsNull; final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; $postNullCheck """ @@ -208,7 +201,7 @@ case class Invoke( val javaType = ctx.javaType(dataType) val obj = targetObject.genCode(ctx) - val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -250,8 +243,7 @@ case class Invoke( val code = s""" ${obj.code} $argCode - boolean ${ev.isNull} = ${obj.isNull}; - $setIsNull + boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { $evaluate @@ -317,13 +309,13 @@ case class NewInstance( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val (argCode, argString, setIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) var isNull = ev.isNull val prepareIsNull = if (needNullCheck) { - s"boolean $isNull = false;" + s"boolean $isNull = $resultIsNull;" } else { isNull = "false" "" @@ -339,7 +331,6 @@ case class NewInstance( $argCode ${outer.map(_.code).getOrElse("")} $prepareIsNull - $setIsNull """ + (if (needNullCheck) { s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" From 4d9a0372a2ded4629bf9657a515cd2e35be17351 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 18:20:09 +0900 Subject: [PATCH 15/29] Skip evaluate arguments if `obj.isNull == true` in `Invoke`. --- .../catalyst/expressions/objects/objects.scala | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 12eaad4a15b63..831995174c690 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -203,6 +203,17 @@ case class Invoke( val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) + val evaluateArguments = if (arguments.nonEmpty) { + s""" + if (!${ev.isNull}) { + $argCode + ${ev.isNull} = $resultIsNull; + } + """ + } else { + "" + } + val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -240,10 +251,11 @@ case class Invoke( } else { "" } + val code = s""" ${obj.code} - $argCode - boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; + boolean ${ev.isNull} = ${obj.isNull}; + $evaluateArguments $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { $evaluate From ebb1241468119a2ebf294111cfe6975fa3b25c59 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 18:54:35 +0900 Subject: [PATCH 16/29] Modify to prepare for further optimization. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 831995174c690..0fd3edeedf2dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -205,9 +205,8 @@ case class Invoke( val evaluateArguments = if (arguments.nonEmpty) { s""" - if (!${ev.isNull}) { + if (!${obj.isNull}) { $argCode - ${ev.isNull} = $resultIsNull; } """ } else { @@ -254,8 +253,8 @@ case class Invoke( val code = s""" ${obj.code} - boolean ${ev.isNull} = ${obj.isNull}; $evaluateArguments + boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { $evaluate From 99a59b24500c338aef22aa43f41259365edb10ba Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 20:54:53 +0900 Subject: [PATCH 17/29] Revert a part of last 2 commits. --- .../sql/catalyst/expressions/objects/objects.scala | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 0fd3edeedf2dc..417b56ed97c1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -203,16 +203,6 @@ case class Invoke( val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) - val evaluateArguments = if (arguments.nonEmpty) { - s""" - if (!${obj.isNull}) { - $argCode - } - """ - } else { - "" - } - val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -253,7 +243,9 @@ case class Invoke( val code = s""" ${obj.code} - $evaluateArguments + if (!${obj.isNull}) { + $argCode + } boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { From 243888aeae61ce7d0dbc24a9604c4818113ac119 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 21:23:00 +0900 Subject: [PATCH 18/29] Revert "Revert a part of last 2 commits." This reverts commit 99a59b24500c338aef22aa43f41259365edb10ba. --- .../sql/catalyst/expressions/objects/objects.scala | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 417b56ed97c1f..0fd3edeedf2dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -203,6 +203,16 @@ case class Invoke( val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) + val evaluateArguments = if (arguments.nonEmpty) { + s""" + if (!${obj.isNull}) { + $argCode + } + """ + } else { + "" + } + val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -243,9 +253,7 @@ case class Invoke( val code = s""" ${obj.code} - if (!${obj.isNull}) { - $argCode - } + $evaluateArguments boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { From e12a9bd45d987359795cfb8c4bc003bb29ae9372 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 21:24:15 +0900 Subject: [PATCH 19/29] Revert "Modify to prepare for further optimization." This reverts commit ebb1241468119a2ebf294111cfe6975fa3b25c59. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 0fd3edeedf2dc..831995174c690 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -205,8 +205,9 @@ case class Invoke( val evaluateArguments = if (arguments.nonEmpty) { s""" - if (!${obj.isNull}) { + if (!${ev.isNull}) { $argCode + ${ev.isNull} = $resultIsNull; } """ } else { @@ -253,8 +254,8 @@ case class Invoke( val code = s""" ${obj.code} + boolean ${ev.isNull} = ${obj.isNull}; $evaluateArguments - boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { $evaluate From 2c52d9102150ce17c7aa00dae13016c83de77b56 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 21:27:41 +0900 Subject: [PATCH 20/29] Use `obj.isNull` instead of `ev.isNull`. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 831995174c690..a89af560ca0c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -205,7 +205,7 @@ case class Invoke( val evaluateArguments = if (arguments.nonEmpty) { s""" - if (!${ev.isNull}) { + if (!${obj.isNull}) { $argCode ${ev.isNull} = $resultIsNull; } From 43d26932ac2058b54854a4e28c13567ac79298f5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 17 Nov 2016 21:39:47 +0900 Subject: [PATCH 21/29] Revert and use a simple case. --- .../catalyst/expressions/objects/objects.scala | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index a89af560ca0c6..417b56ed97c1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -203,17 +203,6 @@ case class Invoke( val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) - val evaluateArguments = if (arguments.nonEmpty) { - s""" - if (!${obj.isNull}) { - $argCode - ${ev.isNull} = $resultIsNull; - } - """ - } else { - "" - } - val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -254,8 +243,10 @@ case class Invoke( val code = s""" ${obj.code} - boolean ${ev.isNull} = ${obj.isNull}; - $evaluateArguments + if (!${obj.isNull}) { + $argCode + } + boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { $evaluate From bd9c09f2e22987d799d4d5560cb65904b2b6d480 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 18 Nov 2016 16:00:15 +0900 Subject: [PATCH 22/29] Remove unused argument. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 417b56ed97c1f..a577e62ee237c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -54,10 +54,9 @@ trait InvokeLike extends Expression with NonSQLExpression { * preparing rest of arguments can be skipped in the case. * * @param ctx a [[CodegenContext]] - * @param ev an [[ExprCode]] with unique terms. * @return (code to prepare arguments, argument string, result of argument null check) */ - def prepareArguments(ctx: CodegenContext, ev: ExprCode): (String, String, String) = { + def prepareArguments(ctx: CodegenContext): (String, String, String) = { val resultIsNull = if (needNullCheck) { val resultIsNull = ctx.freshName("resultIsNull") @@ -136,7 +135,7 @@ case class StaticInvoke( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx) val callFunc = s"$objectName.$functionName($argString)" @@ -201,7 +200,7 @@ case class Invoke( val javaType = ctx.javaType(dataType) val obj = targetObject.genCode(ctx) - val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx) val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty @@ -312,7 +311,7 @@ case class NewInstance( override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = ctx.javaType(dataType) - val (argCode, argString, resultIsNull) = prepareArguments(ctx, ev) + val (argCode, argString, resultIsNull) = prepareArguments(ctx) val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) From 501095f95f23748c15c8506c548655467824a12e Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 18 Nov 2016 16:03:44 +0900 Subject: [PATCH 23/29] Remove unneeded if. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index a577e62ee237c..40a3038c5b780 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -333,12 +333,8 @@ case class NewInstance( $argCode ${outer.map(_.code).getOrElse("")} $prepareIsNull - """ + - (if (needNullCheck) { - s"final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;" - } else { - s"final $javaType ${ev.value} = $constructorCall;" - }) + final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; + """ ev.copy(code = code, isNull = isNull) } From 1baac557bb542cbb5195478a7e440fd03cf9a222 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 18 Nov 2016 16:25:20 +0900 Subject: [PATCH 24/29] Modify generated code of `Invoke`. --- .../sql/catalyst/expressions/objects/objects.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 40a3038c5b780..9d141d6da6eae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -242,15 +242,16 @@ case class Invoke( val code = s""" ${obj.code} + boolean ${ev.isNull} = true; + $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${obj.isNull}) { $argCode + ${ev.isNull} = $resultIsNull; + if (!${ev.isNull}) { + $evaluate + } + $postNullCheck } - boolean ${ev.isNull} = ${obj.isNull} || $resultIsNull; - $javaType ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { - $evaluate - } - $postNullCheck """ ev.copy(code = code) } From 831c521fc63b8e35cf05b45e5e11f73041613cf8 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sat, 19 Nov 2016 00:31:24 +0900 Subject: [PATCH 25/29] Use resultIsNull as ev.isNull in `NewInstance`. --- .../sql/catalyst/expressions/objects/objects.scala | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 9d141d6da6eae..655b2bd366bc3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -316,13 +316,7 @@ case class NewInstance( val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx)) - var isNull = ev.isNull - val prepareIsNull = if (needNullCheck) { - s"boolean $isNull = $resultIsNull;" - } else { - isNull = "false" - "" - } + ev.isNull = resultIsNull val constructorCall = outer.map { gen => s"${gen.value}.new ${cls.getSimpleName}($argString)" @@ -333,10 +327,9 @@ case class NewInstance( val code = s""" $argCode ${outer.map(_.code).getOrElse("")} - $prepareIsNull - final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall; + final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(javaType)} : $constructorCall; """ - ev.copy(code = code, isNull = isNull) + ev.copy(code = code) } override def toString: String = s"newInstance($cls)" From 0b210f80cc548489cc3fdac6bd27805425e8a392 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sat, 19 Nov 2016 00:42:05 +0900 Subject: [PATCH 26/29] Remove unneeded zipWithIndex. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 655b2bd366bc3..815c934114875 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -65,7 +65,7 @@ trait InvokeLike extends Expression with NonSQLExpression { } else { "false" } - val argValues = arguments.zipWithIndex.map { case (e, i) => + val argValues = arguments.map { e => val argValue = ctx.freshName("argValue") ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") argValue From f8acda669bc3b10ca0c2889a6bd137e77d98f552 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sat, 19 Nov 2016 01:05:18 +0900 Subject: [PATCH 27/29] Use local variables for `resultIsNull`s and `argValue`s. --- .../catalyst/expressions/objects/objects.scala | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 815c934114875..d6b677ba8ada3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -59,20 +59,16 @@ trait InvokeLike extends Expression with NonSQLExpression { def prepareArguments(ctx: CodegenContext): (String, String, String) = { val resultIsNull = if (needNullCheck) { - val resultIsNull = ctx.freshName("resultIsNull") - ctx.addMutableState("boolean", resultIsNull, "") - resultIsNull + ctx.freshName("resultIsNull") } else { "false" } val argValues = arguments.map { e => - val argValue = ctx.freshName("argValue") - ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") - argValue + ctx.freshName("argValue") } val argCodes = if (needNullCheck) { - val reset = s"$resultIsNull = false;" + val reset = s"boolean $resultIsNull = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) val updateResultIsNull = if (e.nullable) { @@ -81,6 +77,7 @@ trait InvokeLike extends Expression with NonSQLExpression { "" } s""" + ${ctx.javaType(e.dataType)} ${argValues(i)} = ${ctx.defaultValue(e.dataType)}; if (!$resultIsNull) { ${expr.code} $updateResultIsNull @@ -94,11 +91,11 @@ trait InvokeLike extends Expression with NonSQLExpression { val expr = e.genCode(ctx) s""" ${expr.code} - ${argValues(i)} = ${expr.value}; + ${ctx.javaType(e.dataType)} ${argValues(i)} = ${expr.value}; """ } } - val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) + val argCode = argCodes.mkString("\n") (argCode, argValues.mkString(", "), resultIsNull) } From fe2871c8eb13f9c3354b7076d5eb858c4d80ece5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sat, 19 Nov 2016 08:55:12 +0900 Subject: [PATCH 28/29] Revert "Use local variables for `resultIsNull`s and `argValue`s." This reverts commit f8acda669bc3b10ca0c2889a6bd137e77d98f552. --- .../catalyst/expressions/objects/objects.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index d6b677ba8ada3..815c934114875 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -59,16 +59,20 @@ trait InvokeLike extends Expression with NonSQLExpression { def prepareArguments(ctx: CodegenContext): (String, String, String) = { val resultIsNull = if (needNullCheck) { - ctx.freshName("resultIsNull") + val resultIsNull = ctx.freshName("resultIsNull") + ctx.addMutableState("boolean", resultIsNull, "") + resultIsNull } else { "false" } val argValues = arguments.map { e => - ctx.freshName("argValue") + val argValue = ctx.freshName("argValue") + ctx.addMutableState(ctx.javaType(e.dataType), argValue, "") + argValue } val argCodes = if (needNullCheck) { - val reset = s"boolean $resultIsNull = false;" + val reset = s"$resultIsNull = false;" val argCodes = arguments.zipWithIndex.map { case (e, i) => val expr = e.genCode(ctx) val updateResultIsNull = if (e.nullable) { @@ -77,7 +81,6 @@ trait InvokeLike extends Expression with NonSQLExpression { "" } s""" - ${ctx.javaType(e.dataType)} ${argValues(i)} = ${ctx.defaultValue(e.dataType)}; if (!$resultIsNull) { ${expr.code} $updateResultIsNull @@ -91,11 +94,11 @@ trait InvokeLike extends Expression with NonSQLExpression { val expr = e.genCode(ctx) s""" ${expr.code} - ${ctx.javaType(e.dataType)} ${argValues(i)} = ${expr.value}; + ${argValues(i)} = ${expr.value}; """ } } - val argCode = argCodes.mkString("\n") + val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes) (argCode, argValues.mkString(", "), resultIsNull) } From c88a1ff1b6eeed41dd15e0215070341ca9b7c821 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sat, 19 Nov 2016 12:31:58 +0900 Subject: [PATCH 29/29] Optimize a code in `StaticInvoke`. --- .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 815c934114875..7b71bfbdd3cd8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -150,7 +150,7 @@ case class StaticInvoke( val code = s""" $argCode boolean ${ev.isNull} = $resultIsNull; - final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc; + final $javaType ${ev.value} = $resultIsNull ? ${ctx.defaultValue(dataType)} : $callFunc; $postNullCheck """ ev.copy(code = code)