From b287ded240e27b0db99ee9754f09bf9be121c28f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 10 Nov 2016 18:11:35 +0900 Subject: [PATCH 01/10] Fix nullabilities of MapObjects and optimize not to check null if lambda is not nullable. --- .../expressions/objects/objects.scala | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 50e2ac3c36d93..7c4985f341e03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -422,14 +422,15 @@ case class MapObjects private( lambdaFunction: Expression, inputData: Expression) extends Expression with NonSQLExpression { - override def nullable: Boolean = true + override def nullable: Boolean = inputData.nullable override def children: Seq[Expression] = lambdaFunction :: inputData :: Nil override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") - override def dataType: DataType = ArrayType(lambdaFunction.dataType) + override def dataType: DataType = + ArrayType(lambdaFunction.dataType, containsNull = lambdaFunction.nullable) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val elementJavaType = ctx.javaType(loopVarDataType) @@ -512,6 +513,18 @@ case class MapObjects private( case _ => s"$loopIsNull = $loopValue == null;" } + val setValue = if (lambdaFunction.nullable) { + s""" + if (${genFunction.isNull}) { + $convertedArray[$loopIndex] = null; + } else { + $convertedArray[$loopIndex] = $genFunctionValue; + } + """ + } else { + s"$convertedArray[$loopIndex] = $genFunctionValue;" + } + val code = s""" ${genInputData.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; @@ -528,11 +541,7 @@ case class MapObjects private( $loopNullCheck ${genFunction.code} - if (${genFunction.isNull}) { - $convertedArray[$loopIndex] = null; - } else { - $convertedArray[$loopIndex] = $genFunctionValue; - } + $setValue $loopIndex += 1; } From deffccdf2ef314acf3de94c4fbd33655e65e24e2 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 11 Nov 2016 13:17:24 +0900 Subject: [PATCH 02/10] Use `ctx.nullSafeExec()` to generate nullability checking in `MapObjects`. --- .../expressions/objects/objects.scala | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 7c4985f341e03..cf0e5638777e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -513,23 +513,11 @@ case class MapObjects private( case _ => s"$loopIsNull = $loopValue == null;" } - val setValue = if (lambdaFunction.nullable) { - s""" - if (${genFunction.isNull}) { - $convertedArray[$loopIndex] = null; - } else { - $convertedArray[$loopIndex] = $genFunctionValue; - } - """ - } else { - s"$convertedArray[$loopIndex] = $genFunctionValue;" - } - val code = s""" ${genInputData.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - - if (!${genInputData.isNull}) { + """ + + ctx.nullSafeExec(inputData.nullable, genInputData.isNull)(s""" $determineCollectionType $convertedType[] $convertedArray = null; int $dataLength = $getLength; @@ -541,14 +529,15 @@ case class MapObjects private( $loopNullCheck ${genFunction.code} - $setValue - + """ + + ctx.nullSafeExec(lambdaFunction.nullable, genFunction.isNull)(s""" + $convertedArray[$loopIndex] = $genFunctionValue; + """) + s""" $loopIndex += 1; } ${ev.value} = new ${classOf[GenericArrayData].getName}($convertedArray); - } - """ + """) ev.copy(code = code, isNull = genInputData.isNull) } } From 0fc36a80f546a3ccfe97c70f0a94241208b9ed39 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 14 Nov 2016 16:43:08 +0900 Subject: [PATCH 03/10] Use `ctx.nullSafeExec()` to generate nullability checking. --- .../sql/catalyst/expressions/SortOrder.scala | 14 ++-- .../expressions/datetimeExpressions.scala | 70 ++++++++--------- .../expressions/objects/objects.scala | 75 +++++++++---------- 3 files changed, 78 insertions(+), 81 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 3bebd552ef51a..3ea05dc1f5b96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -142,14 +142,12 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { case _ => "0L" } - ev.copy(code = childCode.code + - s""" - |long ${ev.value} = 0L; - |boolean ${ev.isNull} = ${childCode.isNull}; - |if (!${childCode.isNull}) { - | ${ev.value} = $prefixCode; - |} - """.stripMargin) + ev.copy(code = childCode.code + s""" + long ${ev.value} = 0L; + """ + + ctx.nullSafeExec(child.child.nullable, childCode.isNull)(s""" + ${ev.value} = $prefixCode; + """), isNull = childCode.isNull) } override def dataType: DataType = LongType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 9cec6be841de0..891c9e4f3c50f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -503,16 +503,17 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val eval1 = left.genCode(ctx) ev.copy(code = s""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + """ + + ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" try { ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L; } catch (java.text.ParseException e) { ${ev.isNull} = true; } - }""") + """)) } case StringType => val sdf = classOf[SimpleDateFormat].getName @@ -531,22 +532,22 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { case TimestampType => val eval1 = left.genCode(ctx) ev.copy(code = s""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { + ${eval1.code} + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + """ + + ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" ${ev.value} = ${eval1.value} / 1000000L; - }""") + """), isNull = eval1.isNull) case DateType => val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") val eval1 = left.genCode(ctx) ev.copy(code = s""" ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { + """ + + ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" ${ev.value} = $dtu.daysToMillis(${eval1.value}) / 1000L; - }""") + """), isNull = eval1.isNull) } } } @@ -620,17 +621,18 @@ case class FromUnixTime(sec: Expression, format: Expression) val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val t = left.genCode(ctx) ev.copy(code = s""" - ${t.code} - boolean ${ev.isNull} = ${t.isNull}; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + """ + + ctx.nullSafeExec(left.nullable, t.isNull)(s""" try { ${ev.value} = UTF8String.fromString($formatterName.format( new java.util.Date(${t.value} * 1000L))); } catch (java.lang.IllegalArgumentException e) { ${ev.isNull} = true; } - }""") + """)) } } else { nullSafeCodeGen(ctx, ev, (seconds, f) => { @@ -815,13 +817,12 @@ case class FromUTCTimestamp(left: Expression, right: Expression) ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""") val eval = left.genCode(ctx) ev.copy(code = s""" - |${eval.code} - |boolean ${ev.isNull} = ${eval.isNull}; - |long ${ev.value} = 0; - |if (!${ev.isNull}) { - | ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm); - |} - """.stripMargin) + ${eval.code} + long ${ev.value} = 0; + """ + + ctx.nullSafeExec(left.nullable, eval.isNull)(s""" + ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm); + """), isNull = eval.isNull) } } else { defineCodeGen(ctx, ev, (timestamp, format) => { @@ -974,13 +975,12 @@ case class ToUTCTimestamp(left: Expression, right: Expression) ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""") val eval = left.genCode(ctx) ev.copy(code = s""" - |${eval.code} - |boolean ${ev.isNull} = ${eval.isNull}; - |long ${ev.value} = 0; - |if (!${ev.isNull}) { - | ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm); - |} - """.stripMargin) + ${eval.code} + long ${ev.value} = 0; + """ + + ctx.nullSafeExec(left.nullable, eval.isNull)(s""" + ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm); + """), isNull = eval.isNull) } } else { defineCodeGen(ctx, ev, (timestamp, format) => { @@ -1075,11 +1075,11 @@ case class TruncDate(date: Expression, format: Expression) val d = date.genCode(ctx) ev.copy(code = s""" ${d.code} - boolean ${ev.isNull} = ${d.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { + """ + + ctx.nullSafeExec(date.nullable, d.isNull)(s""" ${ev.value} = $dtu.truncDate(${d.value}, $truncLevel); - }""") + """), isNull = d.isNull) } } else { nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index cf0e5638777e3..ba034adc362c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -664,7 +664,8 @@ case class ExternalMapToCatalyst private( s""" ${inputMap.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${inputMap.isNull}) { + """ + + ctx.nullSafeExec(child.nullable, inputMap.isNull)(s""" final int $length = ${inputMap.value}.size(); final Object[] $convertedKeys = new Object[$length]; final Object[] $convertedValues = new Object[$length]; @@ -682,18 +683,15 @@ case class ExternalMapToCatalyst private( } ${genValueConverter.code} - if (${genValueConverter.isNull}) { - $convertedValues[$index] = null; - } else { - $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; - } - + """ + + ctx.nullSafeExec(valueConverter.nullable, genValueConverter.isNull)(s""" + $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; + """) + s""" $index++; } ${ev.value} = new $mapCls(new $arrayCls($convertedKeys), new $arrayCls($convertedValues)); - } - """ + """) ev.copy(code = code, isNull = inputMap.isNull) } } @@ -721,13 +719,10 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType) val childrenCodes = children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + s""" - if (${eval.isNull}) { - $values[$i] = null; - } else { - $values[$i] = ${eval.value}; - } - """ + eval.code + + ctx.nullSafeExec(e.nullable, eval.isNull)(s""" + $values[$i] = ${eval.value}; + """) } val childrenCode = ctx.splitExpressions(ctx.INPUT_ROW, childrenCodes) @@ -769,10 +764,10 @@ case class EncodeUsingSerializer(child: Expression, kryo: Boolean) val serializerInit = s""" if ($env == null) { $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance(); - } else { - $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); - } - """ + } else { + $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); + } + """ ctx.addMutableState(serializerInstanceClass, serializer, serializerInit) // Code to serialize. @@ -782,8 +777,11 @@ case class EncodeUsingSerializer(child: Expression, kryo: Boolean) val code = s""" ${input.code} - final $javaType ${ev.value} = ${input.isNull} ? ${ctx.defaultValue(javaType)} : $serialize; - """ + final $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + """ + + ctx.nullSafeExec(child.nullable, input.isNull)(s""" + ${ev.value} = $serialize; + """) ev.copy(code = code, isNull = input.isNull) } @@ -815,10 +813,10 @@ case class DecodeUsingSerializer[T](child: Expression, tag: ClassTag[T], kryo: B val serializerInit = s""" if ($env == null) { $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance(); - } else { - $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); - } - """ + } else { + $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); + } + """ ctx.addMutableState(serializerInstanceClass, serializer, serializerInit) // Code to deserialize. @@ -829,8 +827,11 @@ case class DecodeUsingSerializer[T](child: Expression, tag: ClassTag[T], kryo: B val code = s""" ${input.code} - final $javaType ${ev.value} = ${input.isNull} ? ${ctx.defaultValue(javaType)} : $deserialize; - """ + final $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + """ + + ctx.nullSafeExec(child.nullable, input.isNull)(s""" + ${ev.value} = $deserialize; + """) ev.copy(code = code, isNull = input.isNull) } @@ -862,12 +863,11 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp """ } - val code = s""" - ${instanceGen.code} - if (!${instanceGen.isNull}) { - ${initialize.mkString("\n")} - } - """ + val code = + instanceGen.code + + ctx.nullSafeExec(beanInstance.nullable, instanceGen.isNull)(s""" + ${initialize.mkString("\n")} + """) ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value) } } @@ -1000,15 +1000,14 @@ case class ValidateExternalType(child: Expression, expected: DataType) val code = s""" ${input.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - if (!${input.isNull}) { + """ + + ctx.nullSafeExec(child.nullable, input.isNull)(s""" if ($typeCheck) { ${ev.value} = (${ctx.boxedType(dataType)}) $obj; } else { throw new RuntimeException($obj.getClass().getName() + $errMsgField); } - } - - """ + """) ev.copy(code = code, isNull = input.isNull) } } From 658c5e39bcae9eab1c39a59ceb9533c7c30b8ccd Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Tue, 15 Nov 2016 02:45:20 +0900 Subject: [PATCH 04/10] Fix nullability of map values of `ExternalMapToCatalyst`. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index ba034adc362c5..8f86fe2285e21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -601,7 +601,8 @@ case class ExternalMapToCatalyst private( override def foldable: Boolean = false - override def dataType: MapType = MapType(keyConverter.dataType, valueConverter.dataType) + override def dataType: MapType = MapType( + keyConverter.dataType, valueConverter.dataType, valueContainsNull = valueConverter.nullable) override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") From ec0c55c73c080f887c0914de7601698dc1c82c57 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Tue, 15 Nov 2016 03:59:08 +0900 Subject: [PATCH 05/10] Use `ctx.nullSafeExec()` to generate nullability checking in complex type creators. --- .../expressions/complexTypeCreator.scala | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index c9f36649ec8ee..d556c9a0e726d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -67,13 +67,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression { ctx.INPUT_ROW, children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + s""" - if (${eval.isNull}) { - $values[$i] = null; - } else { + eval.code + + ctx.nullSafeExec(e.nullable, eval.isNull)(s""" $values[$i] = ${eval.value}; - } - """ + """) }) + s""" final ArrayData ${ev.value} = new $arrayClass($values); @@ -164,14 +161,10 @@ case class CreateMap(children: Seq[Expression]) extends Expression { ctx.INPUT_ROW, values.zipWithIndex.map { case (value, i) => val eval = value.genCode(ctx) - s""" - ${eval.code} - if (${eval.isNull}) { - $valueArray[$i] = null; - } else { + eval.code + + ctx.nullSafeExec(value.nullable, eval.isNull)(s""" $valueArray[$i] = ${eval.value}; - } - """ + """) }) + s""" final MapData ${ev.value} = new $mapClass($keyData, $valueData); @@ -307,12 +300,10 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc ctx.INPUT_ROW, valExprs.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + s""" - if (${eval.isNull}) { - $values[$i] = null; - } else { - $values[$i] = ${eval.value}; - }""" + eval.code + + ctx.nullSafeExec(e.nullable, eval.isNull)(s""" + $values[$i] = ${eval.value}; + """) }) + s""" final InternalRow ${ev.value} = new $rowClass($values); From 727f5a99da8963972288f0a2e9776b2596ebc3ff Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 21 Nov 2016 17:14:56 +0900 Subject: [PATCH 06/10] Revert "Use `ctx.nullSafeExec()` to generate nullability checking in complex type creators." This reverts commit ec0c55c73c080f887c0914de7601698dc1c82c57. --- .../expressions/complexTypeCreator.scala | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index d556c9a0e726d..c9f36649ec8ee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -67,10 +67,13 @@ case class CreateArray(children: Seq[Expression]) extends Expression { ctx.INPUT_ROW, children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + - ctx.nullSafeExec(e.nullable, eval.isNull)(s""" + eval.code + s""" + if (${eval.isNull}) { + $values[$i] = null; + } else { $values[$i] = ${eval.value}; - """) + } + """ }) + s""" final ArrayData ${ev.value} = new $arrayClass($values); @@ -161,10 +164,14 @@ case class CreateMap(children: Seq[Expression]) extends Expression { ctx.INPUT_ROW, values.zipWithIndex.map { case (value, i) => val eval = value.genCode(ctx) - eval.code + - ctx.nullSafeExec(value.nullable, eval.isNull)(s""" + s""" + ${eval.code} + if (${eval.isNull}) { + $valueArray[$i] = null; + } else { $valueArray[$i] = ${eval.value}; - """) + } + """ }) + s""" final MapData ${ev.value} = new $mapClass($keyData, $valueData); @@ -300,10 +307,12 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc ctx.INPUT_ROW, valExprs.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + - ctx.nullSafeExec(e.nullable, eval.isNull)(s""" - $values[$i] = ${eval.value}; - """) + eval.code + s""" + if (${eval.isNull}) { + $values[$i] = null; + } else { + $values[$i] = ${eval.value}; + }""" }) + s""" final InternalRow ${ev.value} = new $rowClass($values); From dee450efbf5b643ecfbb1875f7cba87c39056fc6 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 21 Nov 2016 17:15:36 +0900 Subject: [PATCH 07/10] Revert "Use `ctx.nullSafeExec()` to generate nullability checking." This reverts commit 0fc36a80f546a3ccfe97c70f0a94241208b9ed39. --- .../sql/catalyst/expressions/SortOrder.scala | 14 ++-- .../expressions/datetimeExpressions.scala | 70 ++++++++--------- .../expressions/objects/objects.scala | 75 ++++++++++--------- 3 files changed, 81 insertions(+), 78 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 3ea05dc1f5b96..3bebd552ef51a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -142,12 +142,14 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { case _ => "0L" } - ev.copy(code = childCode.code + s""" - long ${ev.value} = 0L; - """ + - ctx.nullSafeExec(child.child.nullable, childCode.isNull)(s""" - ${ev.value} = $prefixCode; - """), isNull = childCode.isNull) + ev.copy(code = childCode.code + + s""" + |long ${ev.value} = 0L; + |boolean ${ev.isNull} = ${childCode.isNull}; + |if (!${childCode.isNull}) { + | ${ev.value} = $prefixCode; + |} + """.stripMargin) } override def dataType: DataType = LongType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 891c9e4f3c50f..9cec6be841de0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -503,17 +503,16 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val eval1 = left.genCode(ctx) ev.copy(code = s""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { try { ${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L; } catch (java.text.ParseException e) { ${ev.isNull} = true; } - """)) + }""") } case StringType => val sdf = classOf[SimpleDateFormat].getName @@ -532,22 +531,22 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes { case TimestampType => val eval1 = left.genCode(ctx) ev.copy(code = s""" - ${eval1.code} - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { ${ev.value} = ${eval1.value} / 1000000L; - """), isNull = eval1.isNull) + }""") case DateType => val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") val eval1 = left.genCode(ctx) ev.copy(code = s""" ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(left.nullable, eval1.isNull)(s""" + if (!${ev.isNull}) { ${ev.value} = $dtu.daysToMillis(${eval1.value}) / 1000L; - """), isNull = eval1.isNull) + }""") } } } @@ -621,18 +620,17 @@ case class FromUnixTime(sec: Expression, format: Expression) val formatterName = ctx.addReferenceObj("formatter", formatter, sdf) val t = left.genCode(ctx) ev.copy(code = s""" - ${t.code} - boolean ${ev.isNull} = ${t.isNull}; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(left.nullable, t.isNull)(s""" + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { try { ${ev.value} = UTF8String.fromString($formatterName.format( new java.util.Date(${t.value} * 1000L))); } catch (java.lang.IllegalArgumentException e) { ${ev.isNull} = true; } - """)) + }""") } } else { nullSafeCodeGen(ctx, ev, (seconds, f) => { @@ -817,12 +815,13 @@ case class FromUTCTimestamp(left: Expression, right: Expression) ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""") val eval = left.genCode(ctx) ev.copy(code = s""" - ${eval.code} - long ${ev.value} = 0; - """ + - ctx.nullSafeExec(left.nullable, eval.isNull)(s""" - ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm); - """), isNull = eval.isNull) + |${eval.code} + |boolean ${ev.isNull} = ${eval.isNull}; + |long ${ev.value} = 0; + |if (!${ev.isNull}) { + | ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm); + |} + """.stripMargin) } } else { defineCodeGen(ctx, ev, (timestamp, format) => { @@ -975,12 +974,13 @@ case class ToUTCTimestamp(left: Expression, right: Expression) ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""") val eval = left.genCode(ctx) ev.copy(code = s""" - ${eval.code} - long ${ev.value} = 0; - """ + - ctx.nullSafeExec(left.nullable, eval.isNull)(s""" - ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm); - """), isNull = eval.isNull) + |${eval.code} + |boolean ${ev.isNull} = ${eval.isNull}; + |long ${ev.value} = 0; + |if (!${ev.isNull}) { + | ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm); + |} + """.stripMargin) } } else { defineCodeGen(ctx, ev, (timestamp, format) => { @@ -1075,11 +1075,11 @@ case class TruncDate(date: Expression, format: Expression) val d = date.genCode(ctx) ev.copy(code = s""" ${d.code} + boolean ${ev.isNull} = ${d.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(date.nullable, d.isNull)(s""" + if (!${ev.isNull}) { ${ev.value} = $dtu.truncDate(${d.value}, $truncLevel); - """), isNull = d.isNull) + }""") } } else { nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 8f86fe2285e21..07bd862102576 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -665,8 +665,7 @@ case class ExternalMapToCatalyst private( s""" ${inputMap.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(child.nullable, inputMap.isNull)(s""" + if (!${inputMap.isNull}) { final int $length = ${inputMap.value}.size(); final Object[] $convertedKeys = new Object[$length]; final Object[] $convertedValues = new Object[$length]; @@ -684,15 +683,18 @@ case class ExternalMapToCatalyst private( } ${genValueConverter.code} - """ + - ctx.nullSafeExec(valueConverter.nullable, genValueConverter.isNull)(s""" - $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; - """) + s""" + if (${genValueConverter.isNull}) { + $convertedValues[$index] = null; + } else { + $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; + } + $index++; } ${ev.value} = new $mapCls(new $arrayCls($convertedKeys), new $arrayCls($convertedValues)); - """) + } + """ ev.copy(code = code, isNull = inputMap.isNull) } } @@ -720,10 +722,13 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType) val childrenCodes = children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) - eval.code + - ctx.nullSafeExec(e.nullable, eval.isNull)(s""" - $values[$i] = ${eval.value}; - """) + eval.code + s""" + if (${eval.isNull}) { + $values[$i] = null; + } else { + $values[$i] = ${eval.value}; + } + """ } val childrenCode = ctx.splitExpressions(ctx.INPUT_ROW, childrenCodes) @@ -765,10 +770,10 @@ case class EncodeUsingSerializer(child: Expression, kryo: Boolean) val serializerInit = s""" if ($env == null) { $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance(); - } else { - $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); - } - """ + } else { + $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); + } + """ ctx.addMutableState(serializerInstanceClass, serializer, serializerInit) // Code to serialize. @@ -778,11 +783,8 @@ case class EncodeUsingSerializer(child: Expression, kryo: Boolean) val code = s""" ${input.code} - final $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; - """ + - ctx.nullSafeExec(child.nullable, input.isNull)(s""" - ${ev.value} = $serialize; - """) + final $javaType ${ev.value} = ${input.isNull} ? ${ctx.defaultValue(javaType)} : $serialize; + """ ev.copy(code = code, isNull = input.isNull) } @@ -814,10 +816,10 @@ case class DecodeUsingSerializer[T](child: Expression, tag: ClassTag[T], kryo: B val serializerInit = s""" if ($env == null) { $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance(); - } else { - $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); - } - """ + } else { + $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance(); + } + """ ctx.addMutableState(serializerInstanceClass, serializer, serializerInit) // Code to deserialize. @@ -828,11 +830,8 @@ case class DecodeUsingSerializer[T](child: Expression, tag: ClassTag[T], kryo: B val code = s""" ${input.code} - final $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; - """ + - ctx.nullSafeExec(child.nullable, input.isNull)(s""" - ${ev.value} = $deserialize; - """) + final $javaType ${ev.value} = ${input.isNull} ? ${ctx.defaultValue(javaType)} : $deserialize; + """ ev.copy(code = code, isNull = input.isNull) } @@ -864,11 +863,12 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp """ } - val code = - instanceGen.code + - ctx.nullSafeExec(beanInstance.nullable, instanceGen.isNull)(s""" - ${initialize.mkString("\n")} - """) + val code = s""" + ${instanceGen.code} + if (!${instanceGen.isNull}) { + ${initialize.mkString("\n")} + } + """ ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value) } } @@ -1001,14 +1001,15 @@ case class ValidateExternalType(child: Expression, expected: DataType) val code = s""" ${input.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(child.nullable, input.isNull)(s""" + if (!${input.isNull}) { if ($typeCheck) { ${ev.value} = (${ctx.boxedType(dataType)}) $obj; } else { throw new RuntimeException($obj.getClass().getName() + $errMsgField); } - """) + } + + """ ev.copy(code = code, isNull = input.isNull) } } From cad3d746b4b9d1eb3ed568b53371d5d1a9285db5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 21 Nov 2016 17:15:58 +0900 Subject: [PATCH 08/10] Revert "Use `ctx.nullSafeExec()` to generate nullability checking in `MapObjects`." This reverts commit deffccdf2ef314acf3de94c4fbd33655e65e24e2. --- .../expressions/objects/objects.scala | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 07bd862102576..ff5646f84b5b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -513,11 +513,23 @@ case class MapObjects private( case _ => s"$loopIsNull = $loopValue == null;" } + val setValue = if (lambdaFunction.nullable) { + s""" + if (${genFunction.isNull}) { + $convertedArray[$loopIndex] = null; + } else { + $convertedArray[$loopIndex] = $genFunctionValue; + } + """ + } else { + s"$convertedArray[$loopIndex] = $genFunctionValue;" + } + val code = s""" ${genInputData.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; - """ + - ctx.nullSafeExec(inputData.nullable, genInputData.isNull)(s""" + + if (!${genInputData.isNull}) { $determineCollectionType $convertedType[] $convertedArray = null; int $dataLength = $getLength; @@ -529,15 +541,14 @@ case class MapObjects private( $loopNullCheck ${genFunction.code} - """ + - ctx.nullSafeExec(lambdaFunction.nullable, genFunction.isNull)(s""" - $convertedArray[$loopIndex] = $genFunctionValue; - """) + s""" + $setValue + $loopIndex += 1; } ${ev.value} = new ${classOf[GenericArrayData].getName}($convertedArray); - """) + } + """ ev.copy(code = code, isNull = genInputData.isNull) } } From 78d9e801334590789d5ab98518eb2a884d3e41d5 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 21 Nov 2016 17:45:39 +0900 Subject: [PATCH 09/10] Modify generated codes to be optimized by Janino. --- .../expressions/objects/objects.scala | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index cfb6aafaf3b66..a4cbef6b83a35 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -552,18 +552,6 @@ case class MapObjects private( case _ => s"$loopIsNull = $loopValue == null;" } - val setValue = if (lambdaFunction.nullable) { - s""" - if (${genFunction.isNull}) { - $convertedArray[$loopIndex] = null; - } else { - $convertedArray[$loopIndex] = $genFunctionValue; - } - """ - } else { - s"$convertedArray[$loopIndex] = $genFunctionValue;" - } - val code = s""" ${genInputData.code} ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; @@ -580,7 +568,8 @@ case class MapObjects private( $loopNullCheck ${genFunction.code} - $setValue + $convertedArray[$loopIndex] = + ${genFunction.isNull} ? null : ($convertedType) $genFunctionValue; $loopIndex += 1; } @@ -733,11 +722,8 @@ case class ExternalMapToCatalyst private( } ${genValueConverter.code} - if (${genValueConverter.isNull}) { - $convertedValues[$index] = null; - } else { - $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; - } + $convertedValues[$index] = + ${genValueConverter.isNull} ? null : ($convertedValueType) ${genValueConverter.value}; $index++; } @@ -773,12 +759,8 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType) val childrenCodes = children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) eval.code + s""" - if (${eval.isNull}) { - $values[$i] = null; - } else { - $values[$i] = ${eval.value}; - } - """ + $values[$i] = ${eval.isNull} ? null : (Object) ${eval.value}; + """ } val childrenCode = ctx.splitExpressions(ctx.INPUT_ROW, childrenCodes) From e777fbc778835307f103b6d34d9ec2ca09bae8cc Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Mon, 21 Nov 2016 19:07:22 +0900 Subject: [PATCH 10/10] Revert replacements. --- .../expressions/objects/objects.scala | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index a4cbef6b83a35..5c27179ec3b46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -568,8 +568,11 @@ case class MapObjects private( $loopNullCheck ${genFunction.code} - $convertedArray[$loopIndex] = - ${genFunction.isNull} ? null : ($convertedType) $genFunctionValue; + if (${genFunction.isNull}) { + $convertedArray[$loopIndex] = null; + } else { + $convertedArray[$loopIndex] = $genFunctionValue; + } $loopIndex += 1; } @@ -722,8 +725,11 @@ case class ExternalMapToCatalyst private( } ${genValueConverter.code} - $convertedValues[$index] = - ${genValueConverter.isNull} ? null : ($convertedValueType) ${genValueConverter.value}; + if (${genValueConverter.isNull}) { + $convertedValues[$index] = null; + } else { + $convertedValues[$index] = ($convertedValueType) ${genValueConverter.value}; + } $index++; } @@ -759,8 +765,12 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType) val childrenCodes = children.zipWithIndex.map { case (e, i) => val eval = e.genCode(ctx) eval.code + s""" - $values[$i] = ${eval.isNull} ? null : (Object) ${eval.value}; - """ + if (${eval.isNull}) { + $values[$i] = null; + } else { + $values[$i] = ${eval.value}; + } + """ } val childrenCode = ctx.splitExpressions(ctx.INPUT_ROW, childrenCodes)