From 7d61642860125ff8049578507b6b1143eacad88b Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Wed, 14 Aug 2019 23:27:09 -0400 Subject: [PATCH 1/8] [SPARK-27931][SQL] Accept 'on' and 'off' as input and trim input for a boolean data type. --- .../apache/spark/sql/catalyst/util/StringUtils.scala | 11 +++++++---- .../spark/sql/catalyst/expressions/CastSuite.scala | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 6510bacf55899..5b927fc1e397e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -65,12 +65,15 @@ object StringUtils extends Logging { "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines } - private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString) - private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString) + private[this] val trueStrings = + Set("t", "true", "y", "yes", "1", "on").map(UTF8String.fromString) + + private[this] val falseStrings = + Set("f", "false", "n", "no", "0", "off").map(UTF8String.fromString) // scalastyle:off caselocale - def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase) - def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase) + def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim()) + def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim()) // scalastyle:on caselocale /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1f9fa22d30e13..e29b30e2b5fa3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -825,6 +825,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkCast("y", true) checkCast("yes", true) checkCast("1", true) + checkCast("on", true) + checkCast(" true ", true) checkCast("f", false) checkCast("false", false) @@ -832,6 +834,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkCast("n", false) checkCast("no", false) checkCast("0", false) + checkCast("off", false) + checkCast(" off ", false) checkEvaluation(cast("abc", BooleanType), null) checkEvaluation(cast("", BooleanType), null) From 933fe8680ddbd3a28676d431b4c83d3f3d82d3d6 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Thu, 15 Aug 2019 20:11:03 -0400 Subject: [PATCH 2/8] [SPARK-27931][SQL] Unique prefixes of strings are also accepted for the boolean data type. --- .../spark/sql/catalyst/util/StringUtils.scala | 4 ++-- .../sql/catalyst/expressions/CastSuite.scala | 23 +++++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 5b927fc1e397e..20a155ad06348 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -66,10 +66,10 @@ object StringUtils extends Logging { } private[this] val trueStrings = - Set("t", "true", "y", "yes", "1", "on").map(UTF8String.fromString) + Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString) private[this] val falseStrings = - Set("f", "false", "n", "no", "0", "off").map(UTF8String.fromString) + Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString) // scalastyle:off caselocale def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim()) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index e29b30e2b5fa3..86db2d7c7f445 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -819,23 +819,32 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("cast string to boolean") { - checkCast("t", true) + checkCast("true", true) + checkCast("tru", true) + checkCast("tr", true) + checkCast("t", true) checkCast("tRUe", true) - checkCast("y", true) + checkCast(" tRue ", true) + checkCast(" tRu ", true) checkCast("yes", true) + checkCast("ye", true) + checkCast("y", true) checkCast("1", true) checkCast("on", true) - checkCast(" true ", true) - checkCast("f", false) checkCast("false", false) - checkCast("FAlsE", false) - checkCast("n", false) + checkCast("fals", false) + checkCast("fal", false) + checkCast("fa", false) + checkCast("f", false) + checkCast(" fAlse ", false) + checkCast(" fAls ", false) checkCast("no", false) + checkCast("n", false) checkCast("0", false) checkCast("off", false) - checkCast(" off ", false) + checkCast("of", false) checkEvaluation(cast("abc", BooleanType), null) checkEvaluation(cast("", BooleanType), null) From a5aec9ff78f5e065f8d1cebd460f9fd609286202 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Fri, 16 Aug 2019 09:17:27 -0400 Subject: [PATCH 3/8] Trim string whitespace so it should be true --- sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql index 4427d76f48d80..abc34ce8a5107 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql @@ -23,7 +23,7 @@ SELECT false AS `false`; SELECT boolean('t') AS true; -- [SPARK-27931] Trim the string when cast string type to boolean type -SELECT boolean(' f ') AS `false`; +SELECT boolean(' f ') AS `true`; SELECT boolean('true') AS true; From 9e9aac3269ed5956afc8a9e6a4491f9e6e747508 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Fri, 16 Aug 2019 14:10:17 -0400 Subject: [PATCH 4/8] Generated spark golden files. --- .../test/resources/sql-tests/inputs/pgSQL/boolean.sql | 2 +- .../resources/sql-tests/results/pgSQL/boolean.sql.out | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql index abc34ce8a5107..4427d76f48d80 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql @@ -23,7 +23,7 @@ SELECT false AS `false`; SELECT boolean('t') AS true; -- [SPARK-27931] Trim the string when cast string type to boolean type -SELECT boolean(' f ') AS `true`; +SELECT boolean(' f ') AS `false`; SELECT boolean('true') AS true; diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out index c7903c8a34ef4..203806d43368a 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out @@ -39,7 +39,7 @@ SELECT boolean(' f ') AS `false` -- !query 4 schema struct -- !query 4 output -NULL +false -- !query 5 @@ -127,7 +127,7 @@ SELECT boolean('on') AS true -- !query 15 schema struct -- !query 15 output -NULL +true -- !query 16 @@ -135,7 +135,7 @@ SELECT boolean('off') AS `false` -- !query 16 schema struct -- !query 16 output -NULL +false -- !query 17 @@ -143,7 +143,7 @@ SELECT boolean('of') AS `false` -- !query 17 schema struct -- !query 17 output -NULL +false -- !query 18 @@ -296,7 +296,7 @@ SELECT boolean(string(' true ')) AS true, -- !query 36 schema struct -- !query 36 output -NULL NULL +true false -- !query 37 From dacd46b3856060ba1792779815d3e938468abc5a Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Tue, 27 Aug 2019 15:43:08 -0400 Subject: [PATCH 5/8] add a comment: "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted. --- .../scala/org/apache/spark/sql/catalyst/util/StringUtils.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 20a155ad06348..a14ae540f5056 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -65,6 +65,7 @@ object StringUtils extends Logging { "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines } + // "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted. private[this] val trueStrings = Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString) From abe9a8431f0b7f5cd403e54b31834aecce66c524 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Thu, 29 Aug 2019 21:49:57 -0400 Subject: [PATCH 6/8] [SPARK-27931][SQL] add a "FAlse" test case and remove the redundant space. --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 86db2d7c7f445..f15acb99b3b6a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -819,7 +819,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("cast string to boolean") { - checkCast("true", true) checkCast("tru", true) checkCast("tr", true) @@ -840,6 +839,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkCast("f", false) checkCast(" fAlse ", false) checkCast(" fAls ", false) + checkCast(" FAlsE ", false) checkCast("no", false) checkCast("n", false) checkCast("0", false) From 2ea551c728396881cfb05ed01f6179497bd3ceb5 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Thu, 29 Aug 2019 22:17:08 -0400 Subject: [PATCH 7/8] [SPARK-27931][SQL] add a negative test case for "o" --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index f15acb99b3b6a..65d13c9aad6f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -846,6 +846,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkCast("off", false) checkCast("of", false) + checkEvaluation(cast("o", BooleanType), null) checkEvaluation(cast("abc", BooleanType), null) checkEvaluation(cast("", BooleanType), null) } From b78748379992e37f6d7f550265451e549e0c5cc0 Mon Sep 17 00:00:00 2001 From: younggyu chun Date: Fri, 30 Aug 2019 14:32:20 -0400 Subject: [PATCH 8/8] removed "[SPARK-27931] Trim the string when cast string type to boolean type" comment. --- .../src/test/resources/sql-tests/inputs/pgSQL/boolean.sql | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql index 4427d76f48d80..178823bcfe9d6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql @@ -22,7 +22,6 @@ SELECT false AS `false`; SELECT boolean('t') AS true; --- [SPARK-27931] Trim the string when cast string type to boolean type SELECT boolean(' f ') AS `false`; SELECT boolean('true') AS true; @@ -49,12 +48,10 @@ SELECT boolean('no') AS `false`; -- [SPARK-27923] PostgreSQL does not accept 'nay' but Spark SQL accepts it and sets it to NULL SELECT boolean('nay') AS error; --- [SPARK-27931] Accept 'on' and 'off' as input for boolean data type SELECT boolean('on') AS true; SELECT boolean('off') AS `false`; --- [SPARK-27931] Accept unique prefixes thereof SELECT boolean('of') AS `false`; -- [SPARK-27923] PostgreSQL does not accept 'o' but Spark SQL accepts it and sets it to NULL @@ -101,7 +98,7 @@ SELECT boolean('f') <= boolean('t') AS true; -- explicit casts to/from text SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`; --- [SPARK-27931] Trim the string when cast to boolean type + SELECT boolean(string(' true ')) AS true, boolean(string(' FALSE')) AS `false`; SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`;