diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index 6510bacf55899..a14ae540f5056 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -65,12 +65,16 @@ object StringUtils extends Logging { "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines } - private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString) - private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString) + // "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted. + private[this] val trueStrings = + Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString) + + private[this] val falseStrings = + Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString) // scalastyle:off caselocale - def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase) - def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase) + def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim()) + def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim()) // scalastyle:on caselocale /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1f9fa22d30e13..65d13c9aad6f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -819,20 +819,34 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("cast string to boolean") { - checkCast("t", true) checkCast("true", true) + checkCast("tru", true) + checkCast("tr", true) + checkCast("t", true) checkCast("tRUe", true) - checkCast("y", true) + checkCast(" tRue ", true) + checkCast(" tRu ", true) checkCast("yes", true) + checkCast("ye", true) + checkCast("y", true) checkCast("1", true) + checkCast("on", true) - checkCast("f", false) checkCast("false", false) - checkCast("FAlsE", false) - checkCast("n", false) + checkCast("fals", false) + checkCast("fal", false) + checkCast("fa", false) + checkCast("f", false) + checkCast(" fAlse ", false) + checkCast(" fAls ", false) + checkCast(" FAlsE ", false) checkCast("no", false) + checkCast("n", false) checkCast("0", false) + checkCast("off", false) + checkCast("of", false) + checkEvaluation(cast("o", BooleanType), null) checkEvaluation(cast("abc", BooleanType), null) checkEvaluation(cast("", BooleanType), null) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql index 4427d76f48d80..178823bcfe9d6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql @@ -22,7 +22,6 @@ SELECT false AS `false`; SELECT boolean('t') AS true; --- [SPARK-27931] Trim the string when cast string type to boolean type SELECT boolean(' f ') AS `false`; SELECT boolean('true') AS true; @@ -49,12 +48,10 @@ SELECT boolean('no') AS `false`; -- [SPARK-27923] PostgreSQL does not accept 'nay' but Spark SQL accepts it and sets it to NULL SELECT boolean('nay') AS error; --- [SPARK-27931] Accept 'on' and 'off' as input for boolean data type SELECT boolean('on') AS true; SELECT boolean('off') AS `false`; --- [SPARK-27931] Accept unique prefixes thereof SELECT boolean('of') AS `false`; -- [SPARK-27923] PostgreSQL does not accept 'o' but Spark SQL accepts it and sets it to NULL @@ -101,7 +98,7 @@ SELECT boolean('f') <= boolean('t') AS true; -- explicit casts to/from text SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`; --- [SPARK-27931] Trim the string when cast to boolean type + SELECT boolean(string(' true ')) AS true, boolean(string(' FALSE')) AS `false`; SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`; diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out index c7903c8a34ef4..203806d43368a 100644 --- a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out @@ -39,7 +39,7 @@ SELECT boolean(' f ') AS `false` -- !query 4 schema struct -- !query 4 output -NULL +false -- !query 5 @@ -127,7 +127,7 @@ SELECT boolean('on') AS true -- !query 15 schema struct -- !query 15 output -NULL +true -- !query 16 @@ -135,7 +135,7 @@ SELECT boolean('off') AS `false` -- !query 16 schema struct -- !query 16 output -NULL +false -- !query 17 @@ -143,7 +143,7 @@ SELECT boolean('of') AS `false` -- !query 17 schema struct -- !query 17 output -NULL +false -- !query 18 @@ -296,7 +296,7 @@ SELECT boolean(string(' true ')) AS true, -- !query 36 schema struct -- !query 36 output -NULL NULL +true false -- !query 37