From dbf3ab56dc9c97b90828bdf8bdba5870830ac79a Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 02:00:16 -0700 Subject: [PATCH 1/7] add PI and E --- .../spark/sql/catalyst/expressions/math.scala | 39 +++++++++++++++++++ .../expressions/MathFunctionsSuite.scala | 26 +++++++++++++ 2 files changed, 65 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index 7dacb6a9b47b..7be01f589821 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -20,9 +20,38 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.types.{DataType, DoubleType} +/** + * A leaf expression specifically for math constants. Math constants expect no input. + * @param c The math constant. + * @param name The short name of the function + */ +abstract class LeafMathExpression(c: Double, name: String) + extends LeafExpression with Serializable { + self: Product => + + override def dataType: DataType = DoubleType + override def foldable: Boolean = true + override def nullable: Boolean = false + override def toString: String = s"$name()" + + override def eval(input: Row): Any = c + + // name of constant in java.lang.Math + def constName: String = name + + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + s""" + boolean ${ev.isNull} = false; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + ${ev.primitive} = java.lang.Math.$constName; + """ + } +} + /** * A unary expression specifically for math functions. Math Functions expect a specific type of * input format, therefore these functions extend `ExpectsInputTypes`. + * @param f The math function. * @param name The short name of the function */ abstract class UnaryMathExpression(f: Double => Double, name: String) @@ -98,6 +127,16 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) } } +//////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Leaf math functions +//////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +case class E() extends LeafMathExpression(math.E, "E") + +case class Pi() extends LeafMathExpression(math.Pi, "PI") + //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// // Unary math functions diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala index 25ebc70d095d..042c8efeed41 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala @@ -22,6 +22,24 @@ import org.apache.spark.sql.types.DoubleType class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { + /** + * Used for testing leaf math expressions. + * + * @param e expression + * @param c The constants in scala.math + * @param domain The set of values to run the function with + * @tparam T Generic type for primitives + */ + private def testLeaf[T]( + e: () => Expression, + c: T, + domain: Iterable[T] = (-20 to 20).map(_ * 0.1)): Unit = { + domain.foreach { value => + checkEvaluation(e(), c, EmptyRow) + } + checkEvaluation(e(), c, create_row(null)) + } + /** * Used for testing unary math expressions. * @@ -74,6 +92,14 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(c(Literal(1.0), Literal.create(null, DoubleType)), null, create_row(null)) } + test("e") { + testLeaf(E, math.E) + } + + test("pi") { + testLeaf(Pi, math.Pi) + } + test("sin") { testUnary(Sin, math.sin) } From 82d426ec7f42265b1b7ba6d619f145b78be62456 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 03:08:16 -0700 Subject: [PATCH 2/7] add function entry --- .../scala/org/apache/spark/sql/functions.scala | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 454af47913bf..de68a79f4fda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -944,6 +944,14 @@ object functions { */ def cosh(columnName: String): Column = cosh(Column(columnName)) + /** + * Returns the value of the e. + * + * @group math_funcs + * @since 1.5.0 + */ + def e(): Column = E() + /** * Computes the exponential of the given value. * @@ -1105,6 +1113,14 @@ object functions { */ def log1p(columnName: String): Column = log1p(Column(columnName)) + /** + * Returns the value of the pi. + * + * @group math_funcs + * @since 1.5.0 + */ + def pi(): Column = Pi() + /** * Returns the value of the first argument raised to the power of the second argument. * From e6783ef281922dff8c016a5b032e78892a58e45d Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 03:56:42 -0700 Subject: [PATCH 3/7] register function --- .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 406f6fad8413..13022f4a8d86 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -106,6 +106,7 @@ object FunctionRegistry { expression[Cbrt]("cbrt"), expression[Ceil]("ceil"), expression[Cos]("cos"), + expression[E]("e"), expression[Exp]("exp"), expression[Expm1]("expm1"), expression[Floor]("floor"), @@ -113,6 +114,7 @@ object FunctionRegistry { expression[Log]("log"), expression[Log10]("log10"), expression[Log1p]("log1p"), + expression[Pi]("pi"), expression[Pow]("pow"), expression[Rint]("rint"), expression[Signum]("signum"), From 599ddd8aad3971f598552c9359580d1920593bdb Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 21:55:19 -0700 Subject: [PATCH 4/7] add py --- python/pyspark/sql/functions.py | 25 +++++++++++++++++++ .../spark/sql/catalyst/expressions/math.scala | 6 +---- .../expressions/MathFunctionsSuite.scala | 8 ++---- .../org/apache/spark/sql/functions.scala | 6 +++-- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index bbf465aca8d4..fd6d04930989 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -138,6 +138,13 @@ def _(): 'bitwiseNOT': 'Computes bitwise not.', } +_functions_1_5 = { + 'e': 'Returns the double value that is closer than any other to e, the base of the natural ' + + 'logarithms.', + 'pi': 'Returns the double value that is closer than any other to pi, the ratio of the ' + + 'circumference of a circle to its diameter.' +} + # math functions that take two arguments as input _binary_mathfunctions = { 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + @@ -188,11 +195,14 @@ def _(): globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc)) for _name, _doc in _window_functions.items(): globals()[_name] = since(1.4)(_create_window_function(_name, _doc)) +for _name, _doc in _functions_1_5.items(): + globals()[_name] = since(1.5)(_create_function(_name, _doc)) del _name, _doc __all__ += _functions.keys() __all__ += _functions_1_4.keys() __all__ += _binary_mathfunctions.keys() __all__ += _window_functions.keys() +__all__ += _functions_1_5.keys() __all__.sort() @@ -347,6 +357,21 @@ def randn(seed=None): jc = sc._jvm.functions.randn() return Column(jc) +@since(1.5) +def e(): + """Returns the double value that is closer than any other to e, the base of the natural + logarithms. + """ + import math + return Column(math.e) + +@since(1.5) +def pi(): + """Returns the double value that is closer than any other to pi, the ratio of the circumference + of a circle to its diameter. + """ + import math + return Column(math.pi) @since(1.4) def sparkPartitionId(): diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index 7be01f589821..d9cc830b7ce2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -36,14 +36,10 @@ abstract class LeafMathExpression(c: Double, name: String) override def eval(input: Row): Any = c - // name of constant in java.lang.Math - def constName: String = name - override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { s""" boolean ${ev.isNull} = false; - ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; - ${ev.primitive} = java.lang.Math.$constName; + ${ctx.javaType(dataType)} ${ev.primitive} = java.lang.Math.$name; """ } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala index 042c8efeed41..43fe2e05cbc1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala @@ -27,16 +27,12 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { * * @param e expression * @param c The constants in scala.math - * @param domain The set of values to run the function with * @tparam T Generic type for primitives */ private def testLeaf[T]( e: () => Expression, - c: T, - domain: Iterable[T] = (-20 to 20).map(_ * 0.1)): Unit = { - domain.foreach { value => - checkEvaluation(e(), c, EmptyRow) - } + c: T): Unit = { + checkEvaluation(e(), c, EmptyRow) checkEvaluation(e(), c, create_row(null)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index de68a79f4fda..31d14564b57e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -945,7 +945,8 @@ object functions { def cosh(columnName: String): Column = cosh(Column(columnName)) /** - * Returns the value of the e. + * Returns the double value that is closer than any other to e, the base of the natural + * logarithms. * * @group math_funcs * @since 1.5.0 @@ -1114,7 +1115,8 @@ object functions { def log1p(columnName: String): Column = log1p(Column(columnName)) /** - * Returns the value of the pi. + * Returns the double value that is closer than any other to pi, the ratio of the circumference + * of a circle to its diameter. * * @group math_funcs * @since 1.5.0 From db331c964a9b9950447c20b89c6bbce40f2c7a2b Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 22:16:44 -0700 Subject: [PATCH 5/7] py style --- python/pyspark/sql/functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fd6d04930989..0aa98bf5cac0 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -357,6 +357,7 @@ def randn(seed=None): jc = sc._jvm.functions.randn() return Column(jc) + @since(1.5) def e(): """Returns the double value that is closer than any other to e, the base of the natural @@ -365,6 +366,7 @@ def e(): import math return Column(math.e) + @since(1.5) def pi(): """Returns the double value that is closer than any other to pi, the ratio of the circumference @@ -373,6 +375,7 @@ def pi(): import math return Column(math.pi) + @since(1.4) def sparkPartitionId(): """A column for partition ID of the Spark task. From 11b351c2a0fc9910a03d15befa09b41918b83396 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 9 Jun 2015 23:42:10 -0700 Subject: [PATCH 6/7] add tests and remove pu --- python/pyspark/sql/functions.py | 28 ------------------- .../catalyst/analysis/FunctionRegistry.scala | 2 +- .../spark/sql/catalyst/expressions/math.scala | 2 +- .../expressions/MathFunctionsSuite.scala | 2 +- .../org/apache/spark/sql/functions.scala | 2 +- .../spark/sql/DataFrameFunctionsSuite.scala | 11 ++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 11 ++++++++ 7 files changed, 26 insertions(+), 32 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 0aa98bf5cac0..bbf465aca8d4 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -138,13 +138,6 @@ def _(): 'bitwiseNOT': 'Computes bitwise not.', } -_functions_1_5 = { - 'e': 'Returns the double value that is closer than any other to e, the base of the natural ' + - 'logarithms.', - 'pi': 'Returns the double value that is closer than any other to pi, the ratio of the ' + - 'circumference of a circle to its diameter.' -} - # math functions that take two arguments as input _binary_mathfunctions = { 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + @@ -195,14 +188,11 @@ def _(): globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc)) for _name, _doc in _window_functions.items(): globals()[_name] = since(1.4)(_create_window_function(_name, _doc)) -for _name, _doc in _functions_1_5.items(): - globals()[_name] = since(1.5)(_create_function(_name, _doc)) del _name, _doc __all__ += _functions.keys() __all__ += _functions_1_4.keys() __all__ += _binary_mathfunctions.keys() __all__ += _window_functions.keys() -__all__ += _functions_1_5.keys() __all__.sort() @@ -358,24 +348,6 @@ def randn(seed=None): return Column(jc) -@since(1.5) -def e(): - """Returns the double value that is closer than any other to e, the base of the natural - logarithms. - """ - import math - return Column(math.e) - - -@since(1.5) -def pi(): - """Returns the double value that is closer than any other to pi, the ratio of the circumference - of a circle to its diameter. - """ - import math - return Column(math.pi) - - @since(1.4) def sparkPartitionId(): """A column for partition ID of the Spark task. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 13022f4a8d86..ce5ee5e6e160 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -106,7 +106,7 @@ object FunctionRegistry { expression[Cbrt]("cbrt"), expression[Ceil]("ceil"), expression[Cos]("cos"), - expression[E]("e"), + expression[EulerNumber]("e"), expression[Exp]("exp"), expression[Expm1]("expm1"), expression[Floor]("floor"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index d9cc830b7ce2..e1d8c9a0cdb5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -129,7 +129,7 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -case class E() extends LeafMathExpression(math.E, "E") +case class EulerNumber() extends LeafMathExpression(math.E, "E") case class Pi() extends LeafMathExpression(math.Pi, "PI") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala index 43fe2e05cbc1..1fe69059d39d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala @@ -89,7 +89,7 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("e") { - testLeaf(E, math.E) + testLeaf(EulerNumber, math.E) } test("pi") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 31d14564b57e..b3fc1e6cd987 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -951,7 +951,7 @@ object functions { * @group math_funcs * @since 1.5.0 */ - def e(): Column = E() + def e(): Column = EulerNumber() /** * Computes the exponential of the given value. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 53c2befb7370..754e83483560 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -85,6 +85,17 @@ class DataFrameFunctionsSuite extends QueryTest { } } + test("constant function") { + checkAnswer( + testData2.select(e()).limit(1), + Row(scala.math.E) + ) + checkAnswer( + testData2.select(pi()).limit(1), + Row(scala.math.Pi) + ) + } + test("bitwiseNOT") { checkAnswer( testData2.select(bitwiseNOT($"a")), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 5babc4332cc7..f50a74c327c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -145,6 +145,17 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils { Seq(Row("1"), Row("2"))) } + test("constant functions") { + checkAnswer( + sql("SELECT E()"), + Row(scala.math.E) + ) + checkAnswer( + sql("SELECT PI()"), + Row(scala.math.Pi) + ) + } + test("SPARK-3176 Added Parser of SQL ABS()") { checkAnswer( sql("SELECT ABS(-1.3)"), From e2e8dbdf93e83a0ce33dd8e0b0e3bc223fdc3ec9 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Wed, 10 Jun 2015 00:11:27 -0700 Subject: [PATCH 7/7] move tests --- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 10 +++++++++- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 ----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 754e83483560..b93ad39f5da4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -85,7 +85,7 @@ class DataFrameFunctionsSuite extends QueryTest { } } - test("constant function") { + test("constant functions") { checkAnswer( testData2.select(e()).limit(1), Row(scala.math.E) @@ -94,6 +94,14 @@ class DataFrameFunctionsSuite extends QueryTest { testData2.select(pi()).limit(1), Row(scala.math.Pi) ) + checkAnswer( + ctx.sql("SELECT E()"), + Row(scala.math.E) + ) + checkAnswer( + ctx.sql("SELECT PI()"), + Row(scala.math.Pi) + ) } test("bitwiseNOT") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f50a74c327c1..5babc4332cc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -145,17 +145,6 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils { Seq(Row("1"), Row("2"))) } - test("constant functions") { - checkAnswer( - sql("SELECT E()"), - Row(scala.math.E) - ) - checkAnswer( - sql("SELECT PI()"), - Row(scala.math.Pi) - ) - } - test("SPARK-3176 Added Parser of SQL ABS()") { checkAnswer( sql("SELECT ABS(-1.3)"),