From 721651119caf695689581a8f0eed83f5313a4110 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Tue, 28 Apr 2020 19:02:39 +0900 Subject: [PATCH 1/4] Fix --- .../spark/sql/catalyst/expressions/randomExpressions.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index cc05828cfcccb..36a9dcfb98ff4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -102,6 +102,8 @@ case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed { } override def freshCopy(): Rand = Rand(child) + + override def sql: String = "rand()" } object Rand { @@ -145,6 +147,8 @@ case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed { } override def freshCopy(): Randn = Randn(child) + + override def sql: String = "randn()" } object Randn { From 663fdc7266bd3ec6353db84d46cc80d2d1d7decf Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 29 Apr 2020 01:31:47 +0900 Subject: [PATCH 2/4] Fix --- .../expressions/randomExpressions.scala | 24 ++++++++++++++----- .../catalyst/expressions/RandomSuite.scala | 7 ++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 22 +++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 36a9dcfb98ff4..fddde7c5ee6f8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -83,9 +83,12 @@ trait ExpressionWithRandomSeed { """, since = "1.5.0") // scalastyle:on line.size.limit -case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed { +case class Rand(child: Expression, useRandSeed: Boolean = false) + extends RDG with ExpressionWithRandomSeed { - def this() = this(Literal(Utils.random.nextLong(), LongType)) + def this() = this(Literal(Utils.random.nextLong(), LongType), true) + + def this(child: Expression) = this(child, false) override def withNewSeed(seed: Long): Rand = Rand(Literal(seed, LongType)) @@ -103,7 +106,10 @@ case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed { override def freshCopy(): Rand = Rand(child) - override def sql: String = "rand()" + override def flatArguments: Iterator[Any] = Iterator(child) + override def sql: String = { + s"rand(${if (useRandSeed) "" else child.sql})" + } } object Rand { @@ -128,9 +134,12 @@ object Rand { """, since = "1.5.0") // scalastyle:on line.size.limit -case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed { +case class Randn(child: Expression, useRandSeed: Boolean = false) + extends RDG with ExpressionWithRandomSeed { - def this() = this(Literal(Utils.random.nextLong(), LongType)) + def this() = this(Literal(Utils.random.nextLong(), LongType), true) + + def this(child: Expression) = this(child, false) override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType)) @@ -148,7 +157,10 @@ case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed { override def freshCopy(): Randn = Randn(child) - override def sql: String = "randn()" + override def flatArguments: Iterator[Any] = Iterator(child) + override def sql: String = { + s"randn(${if (useRandSeed) "" else child.sql})" + } } object Randn { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala index 469c24b3b5f49..f067ca84699a9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala @@ -34,4 +34,11 @@ class RandomSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Rand(5419823303878592871L), 0.7145363364564755) checkEvaluation(Randn(5419823303878592871L), 0.7816815274533012) } + + test("Do not display the seed of rand/randn with no argument in output schema") { + assert(Rand(Literal(1L), true).sql === "rand()") + assert(Randn(Literal(1L), true).sql === "randn()") + assert(Rand(Literal(1L), false).sql === "rand(1L)") + assert(Randn(Literal(1L), false).sql === "randn(1L)") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a958ab8064ba9..d9472e745b0ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3425,6 +3425,28 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark assert(SQLConf.get.getConf(SQLConf.CODEGEN_FALLBACK) === true) } } + + test("Do not display the seed of rand/randn with no argument in output schema") { + def checkIfSeedExistsInExplain(df: DataFrame): Unit = { + val output = new java.io.ByteArrayOutputStream() + Console.withOut(output) { + df.explain() + } + output.toString.matches("""randn?\(-?[0-9]+\)""") + } + val df1 = sql("SELECT rand()") + assert(df1.schema.head.name === "rand()") + checkIfSeedExistsInExplain(df1) + val df2 = sql("SELECT rand(1L)") + assert(df2.schema.head.name === "rand(1)") + checkIfSeedExistsInExplain(df2) + val df3 = sql("SELECT randn()") + assert(df3.schema.head.name === "randn()") + checkIfSeedExistsInExplain(df1) + val df4 = sql("SELECT randn(1L)") + assert(df4.schema.head.name === "randn(1)") + checkIfSeedExistsInExplain(df2) + } } case class Foo(bar: Option[String]) From 4b1f3f2a34ddac22d940aac02a592b12a27ccb60 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 29 Apr 2020 09:03:44 +0900 Subject: [PATCH 3/4] Fix --- .../sql/catalyst/expressions/randomExpressions.scala | 12 ++++++------ .../spark/sql/catalyst/expressions/RandomSuite.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index fddde7c5ee6f8..6a945173803b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -83,7 +83,7 @@ trait ExpressionWithRandomSeed { """, since = "1.5.0") // scalastyle:on line.size.limit -case class Rand(child: Expression, useRandSeed: Boolean = false) +case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG with ExpressionWithRandomSeed { def this() = this(Literal(Utils.random.nextLong(), LongType), true) @@ -104,11 +104,11 @@ case class Rand(child: Expression, useRandSeed: Boolean = false) isNull = FalseLiteral) } - override def freshCopy(): Rand = Rand(child) + override def freshCopy(): Rand = Rand(child, hideSeed) override def flatArguments: Iterator[Any] = Iterator(child) override def sql: String = { - s"rand(${if (useRandSeed) "" else child.sql})" + s"rand(${if (hideSeed) "" else child.sql})" } } @@ -134,7 +134,7 @@ object Rand { """, since = "1.5.0") // scalastyle:on line.size.limit -case class Randn(child: Expression, useRandSeed: Boolean = false) +case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG with ExpressionWithRandomSeed { def this() = this(Literal(Utils.random.nextLong(), LongType), true) @@ -155,11 +155,11 @@ case class Randn(child: Expression, useRandSeed: Boolean = false) isNull = FalseLiteral) } - override def freshCopy(): Randn = Randn(child) + override def freshCopy(): Randn = Randn(child, hideSeed) override def flatArguments: Iterator[Any] = Iterator(child) override def sql: String = { - s"randn(${if (useRandSeed) "" else child.sql})" + s"randn(${if (hideSeed) "" else child.sql})" } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala index f067ca84699a9..2aa53f581555f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala @@ -35,7 +35,7 @@ class RandomSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Randn(5419823303878592871L), 0.7816815274533012) } - test("Do not display the seed of rand/randn with no argument in output schema") { + test("SPARK-31594: Do not display the seed of rand/randn with no argument in output schema") { assert(Rand(Literal(1L), true).sql === "rand()") assert(Randn(Literal(1L), true).sql === "randn()") assert(Rand(Literal(1L), false).sql === "rand(1L)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index d9472e745b0ef..100f424ad2d4f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3426,7 +3426,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("Do not display the seed of rand/randn with no argument in output schema") { + test("SPARK-31594: Do not display the seed of rand/randn with no argument in output schema") { def checkIfSeedExistsInExplain(df: DataFrame): Unit = { val output = new java.io.ByteArrayOutputStream() Console.withOut(output) { From d90b2e726135e91eadf10c2972199c3f45887b5c Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 29 Apr 2020 15:30:41 +0900 Subject: [PATCH 4/4] Fix --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 100f424ad2d4f..d5247fba00283 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3432,7 +3432,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Console.withOut(output) { df.explain() } - output.toString.matches("""randn?\(-?[0-9]+\)""") + val projectExplainOutput = output.toString.split("\n").find(_.contains("Project")).get + assert(projectExplainOutput.matches(""".*randn?\(-?[0-9]+\).*""")) } val df1 = sql("SELECT rand()") assert(df1.schema.head.name === "rand()")