From b3df91b01779d3f3af1345af945914941d314163 Mon Sep 17 00:00:00 2001 From: xinyunh Date: Thu, 14 Aug 2014 13:13:02 -0700 Subject: [PATCH 1/6] add 'Last' component --- .../apache/spark/sql/catalyst/SqlParser.scala | 2 ++ .../spark/sql/catalyst/dsl/package.scala | 1 + .../sql/catalyst/expressions/aggregates.scala | 29 +++++++++++++++++++ 3 files changed, 32 insertions(+) mode change 100644 => 100755 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala mode change 100644 => 100755 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala mode change 100644 => 100755 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala old mode 100644 new mode 100755 index a88bd859fc85e..8864780456c76 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -81,6 +81,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val DISTINCT = Keyword("DISTINCT") protected val FALSE = Keyword("FALSE") protected val FIRST = Keyword("FIRST") + protected val LAST = Keyword("LAST") protected val FROM = Keyword("FROM") protected val FULL = Keyword("FULL") protected val GROUP = Keyword("GROUP") @@ -311,6 +312,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble) } | FIRST ~> "(" ~> expression <~ ")" ^^ { case exp => First(exp) } | + LAST ~> "(" ~> expression <~ ")" ^^ { case exp => Last(exp) } | AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } | MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } | MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } | diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala old mode 100644 new mode 100755 index f44521d6381c9..deb622c39faf5 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -132,6 +132,7 @@ package object dsl { def approxCountDistinct(e: Expression, rsd: Double = 0.05) = ApproxCountDistinct(e, rsd) def avg(e: Expression) = Average(e) def first(e: Expression) = First(e) + def last(e: Expression) = Last(e) def min(e: Expression) = Min(e) def max(e: Expression) = Max(e) def upper(e: Expression) = Upper(e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala old mode 100644 new mode 100755 index 15560a2a933ad..47338a2e92eac --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -344,6 +344,21 @@ case class First(child: Expression) extends PartialAggregate with trees.UnaryNod override def newInstance() = new FirstFunction(child, this) } +case class Last(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { + override def references = child.references + override def nullable = true + override def dataType = child.dataType + override def toString = s"LAST($child)" + + override def asPartial: SplitEvaluation = { + val partialLast = Alias(Last(child), "PartialLast")() + SplitEvaluation( + Last(partialLast.toAttribute), + partialLast :: Nil) + } + override def newInstance() = new LastFunction(child, this) +} + case class AverageFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { @@ -489,3 +504,17 @@ case class FirstFunction(expr: Expression, base: AggregateExpression) extends Ag override def eval(input: Row): Any = result } + +case class LastFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { + def this() = this(null, null) // Required for serialization. + + var result: Any = null + + override def update(input: Row): Unit = { + if (result == null) { + result = input + } + } + + override def eval(input: Row): Any = if (result != null) expr.eval(result.asInstanceOf[Row]) else null +} From 7f6980a0186cde2a23a6f9537845bf108f4b0818 Mon Sep 17 00:00:00 2001 From: xinyunh Date: Thu, 14 Aug 2014 16:10:28 -0700 Subject: [PATCH 2/6] fix the bug in 'Last' component --- .../apache/spark/sql/catalyst/expressions/aggregates.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index 47338a2e92eac..f519d96963e1c 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -511,9 +511,7 @@ case class LastFunction(expr: Expression, base: AggregateExpression) extends Agg var result: Any = null override def update(input: Row): Unit = { - if (result == null) { - result = input - } + result = input } override def eval(input: Row): Any = if (result != null) expr.eval(result.asInstanceOf[Row]) else null From ff8e51e815904c3c7f0e7dced4d54a9047cb6da7 Mon Sep 17 00:00:00 2001 From: bomeng Date: Wed, 20 Aug 2014 11:16:35 -0700 Subject: [PATCH 3/6] add abs() function support --- .../apache/spark/sql/catalyst/SqlParser.scala | 2 + .../sql/catalyst/expressions/arithmetic.scala | 50 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 8864780456c76..0eff1614c3c95 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -125,6 +125,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val SUBSTR = Keyword("SUBSTR") protected val SUBSTRING = Keyword("SUBSTRING") protected val SQRT = Keyword("SQRT") + protected val ABS = Keyword("ABS") // Use reflection to find the reserved words defined in this class. protected val reservedWords = @@ -328,6 +329,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l) } | SQRT ~> "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) } | + ABS ~> "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) } | ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ { case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 56f042891a2e6..b9dc85085ea94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.types._ +import scala.math.pow case class UnaryMinus(child: Expression) extends UnaryExpression { type EvaluatedType = Any @@ -129,3 +130,52 @@ case class MaxOf(left: Expression, right: Expression) extends Expression { override def toString = s"MaxOf($left, $right)" } + +/** + * A function that get the absolute value of the numeric value. + */ +case class Abs(child: Expression) extends UnaryExpression { + type EvaluatedType = Any + + def dataType = child.dataType + override def foldable = child.foldable + def nullable = child.nullable + override def toString = s"Abs($child)" + + override def eval(input: Row): Any = n1(child, input, _.abs(_)) +} + +/** + * A function that get the power value of two parameters. + * First one is taken as base while second one taken as exponent + */ +case class Power(base: Expression, exponent: Expression) extends Expression { + type EvaluatedType = Any + + def dataType: DataType = { + if (!resolved) throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") + DoubleType + } + override def foldable = base.foldable && exponent.foldable + def nullable: Boolean = base.nullable || exponent.nullable + override def toString = s"Power($base, $exponent)" + + override def children = base :: exponent :: Nil + + override def eval(input: Row): Any = { + def convertToDouble(num: EvaluatedType): Double = { + num match { + case d:Double => d + case i:Integer => i.doubleValue() + case f:Float => f.toDouble + } + } + + val base_v = base.eval(input) + val exponent_v = exponent.eval(input) + + if ((base_v == null) || (exponent_v == null)) null + else pow(convertToDouble(base_v), convertToDouble(exponent_v)) + } + +} From 39f0309195eb162f8e276bf758600e70ba39ad49 Mon Sep 17 00:00:00 2001 From: bomeng Date: Wed, 20 Aug 2014 11:35:48 -0700 Subject: [PATCH 4/6] Modify the code of POWER and ABS. Move them to the file arithmetic --- .../apache/spark/sql/catalyst/SqlParser.scala | 5 ++ .../org/apache/spark/sql/SQLQuerySuite.scala | 56 ++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 0eff1614c3c95..c10445c0f691a 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -126,6 +126,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val SUBSTRING = Keyword("SUBSTRING") protected val SQRT = Keyword("SQRT") protected val ABS = Keyword("ABS") + protected val POW = Keyword("POW") + protected val POWER = Keyword("POWER") // Use reflection to find the reserved words defined in this class. protected val reservedWords = @@ -330,6 +332,9 @@ class SqlParser extends StandardTokenParsers with PackratParsers { } | SQRT ~> "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) } | ABS ~> "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) } | + (POW | POWER) ~> "(" ~> expression ~ "," ~ expression <~ ")" ^^ { + case s ~ "," ~ p => Power(s,p) + } | ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ { case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 1ac205937714c..4a6842c66ebd7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -41,6 +41,58 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } + test("SPARK-3176 Added Parser of SQL ABS()") { + checkAnswer( + sql("SELECT ABS(-1.3)"), + 1.3) + checkAnswer( + sql("SELECT ABS(0.0)"), + 0.0) + checkAnswer( + sql("SELECT ABS(2.5)"), + 2.5) + } + + test("SPARK-3176 Added Parser of SQL POWER()") { + checkAnswer( + sql("SELECT POWER(0, 512.0)"), + 0.0) + checkAnswer( + sql("SELECT POW(1.0, 256.0)"), + 1.0) + checkAnswer( + sql("SELECT POWER(1, -128)"), + 1.0) + checkAnswer( + sql("SELECT POW(-1.0, -63)"), + -1.0) + checkAnswer( + sql("SELECT POWER(-1, 32.0)"), + 1.0) + checkAnswer( + sql("SELECT POW(2, 8)"), + 256.0) + checkAnswer( + sql("SELECT POWER(0.5, 2)"), + 0.25) + checkAnswer( + sql("SELECT POW(2, -2)"), + 0.25) + checkAnswer( + sql("SELECT POWER(8, 1)"), + 8.0) + checkAnswer( + sql("SELECT POW(16, 0.5)"), + 4.0) + } + + test("SPARK-3176 Added Parser of SQL LAST()") { + checkAnswer( + sql("SELECT LAST(n) FROM lowerCaseData"), + 4) + } + + test("SPARK-2041 column name equals tablename") { checkAnswer( sql("SELECT tableName FROM tableName"), @@ -53,14 +105,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq ) } - + test("SQRT with automatic string casts") { checkAnswer( sql("SELECT SQRT(CAST(key AS STRING)) FROM testData"), (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq ) } - + test("SPARK-2407 Added Parser of SQL SUBSTR()") { checkAnswer( sql("SELECT substr(tableName, 1, 2) FROM tableName"), From 88436434676509ee00884aae66fb087562951721 Mon Sep 17 00:00:00 2001 From: xinyunh Date: Tue, 2 Sep 2014 10:21:21 -0700 Subject: [PATCH 5/6] fix the code style issue --- .../apache/spark/sql/catalyst/expressions/aggregates.scala | 3 ++- .../apache/spark/sql/catalyst/expressions/arithmetic.scala | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index f519d96963e1c..1b4d892625dbb 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -514,5 +514,6 @@ case class LastFunction(expr: Expression, base: AggregateExpression) extends Agg result = input } - override def eval(input: Row): Any = if (result != null) expr.eval(result.asInstanceOf[Row]) else null + override def eval(input: Row): Any = if (result != null) expr.eval(result.asInstanceOf[Row]) + else null } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index b9dc85085ea94..38581f1367832 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -153,7 +153,9 @@ case class Power(base: Expression, exponent: Expression) extends Expression { type EvaluatedType = Any def dataType: DataType = { - if (!resolved) throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") + if (!resolved) { + throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") + } DoubleType } override def foldable = base.foldable && exponent.foldable From 71d15e7eb757e32e6fa0c47425905f7cd58d9bee Mon Sep 17 00:00:00 2001 From: xinyunh Date: Wed, 3 Sep 2014 11:23:51 -0700 Subject: [PATCH 6/6] remove POWER part --- .../apache/spark/sql/catalyst/SqlParser.scala | 5 --- .../sql/catalyst/expressions/arithmetic.scala | 37 ------------------- .../org/apache/spark/sql/SQLQuerySuite.scala | 33 ----------------- 3 files changed, 75 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index c10445c0f691a..0eff1614c3c95 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -126,8 +126,6 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val SUBSTRING = Keyword("SUBSTRING") protected val SQRT = Keyword("SQRT") protected val ABS = Keyword("ABS") - protected val POW = Keyword("POW") - protected val POWER = Keyword("POWER") // Use reflection to find the reserved words defined in this class. protected val reservedWords = @@ -332,9 +330,6 @@ class SqlParser extends StandardTokenParsers with PackratParsers { } | SQRT ~> "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) } | ABS ~> "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) } | - (POW | POWER) ~> "(" ~> expression ~ "," ~ expression <~ ")" ^^ { - case s ~ "," ~ p => Power(s,p) - } | ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ { case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 38581f1367832..7a10700d2887f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -144,40 +144,3 @@ case class Abs(child: Expression) extends UnaryExpression { override def eval(input: Row): Any = n1(child, input, _.abs(_)) } - -/** - * A function that get the power value of two parameters. - * First one is taken as base while second one taken as exponent - */ -case class Power(base: Expression, exponent: Expression) extends Expression { - type EvaluatedType = Any - - def dataType: DataType = { - if (!resolved) { - throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") - } - DoubleType - } - override def foldable = base.foldable && exponent.foldable - def nullable: Boolean = base.nullable || exponent.nullable - override def toString = s"Power($base, $exponent)" - - override def children = base :: exponent :: Nil - - override def eval(input: Row): Any = { - def convertToDouble(num: EvaluatedType): Double = { - num match { - case d:Double => d - case i:Integer => i.doubleValue() - case f:Float => f.toDouble - } - } - - val base_v = base.eval(input) - val exponent_v = exponent.eval(input) - - if ((base_v == null) || (exponent_v == null)) null - else pow(convertToDouble(base_v), convertToDouble(exponent_v)) - } - -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4a6842c66ebd7..fb7322dbfdf38 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -53,39 +53,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { 2.5) } - test("SPARK-3176 Added Parser of SQL POWER()") { - checkAnswer( - sql("SELECT POWER(0, 512.0)"), - 0.0) - checkAnswer( - sql("SELECT POW(1.0, 256.0)"), - 1.0) - checkAnswer( - sql("SELECT POWER(1, -128)"), - 1.0) - checkAnswer( - sql("SELECT POW(-1.0, -63)"), - -1.0) - checkAnswer( - sql("SELECT POWER(-1, 32.0)"), - 1.0) - checkAnswer( - sql("SELECT POW(2, 8)"), - 256.0) - checkAnswer( - sql("SELECT POWER(0.5, 2)"), - 0.25) - checkAnswer( - sql("SELECT POW(2, -2)"), - 0.25) - checkAnswer( - sql("SELECT POWER(8, 1)"), - 8.0) - checkAnswer( - sql("SELECT POW(16, 0.5)"), - 4.0) - } - test("SPARK-3176 Added Parser of SQL LAST()") { checkAnswer( sql("SELECT LAST(n) FROM lowerCaseData"),