Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ object StringUtils extends Logging {
"(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
}

private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)
// "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted.
private[this] val trueStrings =
Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)

private[this] val falseStrings =
Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)

// scalastyle:off caselocale
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim())
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim())
// scalastyle:on caselocale

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,20 +819,34 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
}

test("cast string to boolean") {
checkCast("t", true)
checkCast("true", true)
checkCast("tru", true)
checkCast("tr", true)
checkCast("t", true)
checkCast("tRUe", true)
checkCast("y", true)
checkCast(" tRue ", true)
checkCast(" tRu ", true)
checkCast("yes", true)
checkCast("ye", true)
checkCast("y", true)
checkCast("1", true)
checkCast("on", true)

checkCast("f", false)
checkCast("false", false)
checkCast("FAlsE", false)
checkCast("n", false)
checkCast("fals", false)
checkCast("fal", false)
checkCast("fa", false)
checkCast("f", false)
checkCast(" fAlse ", false)
checkCast(" fAls ", false)
checkCast(" FAlsE ", false)
checkCast("no", false)
checkCast("n", false)
checkCast("0", false)
checkCast("off", false)
checkCast("of", false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@younggyuchun, just for doubly sure, did you double check the behaviours against PostgreSQL?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Here it is:
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not documented: https://www.postgresql.org/docs/devel/datatype-boolean.html

Postgres may support of for history reasons, I don't think we have to follow it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan @dongjoon-hyun @HyukjinKwon
This build accepts several unique prefixes for the boolean data type. For example, tru, tr, ye, fals, fal, fa and of, which are not documented. Do we want to not to accept these prefixes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan . It's a documented feature in that document. We had better support it.

Unique prefixes of these strings are also accepted, for example t or n. Leading or trailing whitespace is ignored, and case does not matter.

cc @gatorsmile

Copy link
Member

@dongjoon-hyun dongjoon-hyun Aug 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, @younggyuchun . Please add a negative test case.
o is not supported because it's not unique. It's a common prefix for on and off.
It should be null.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems okay to me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM


checkEvaluation(cast("o", BooleanType), null)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun
Add a negative test for "o"

checkEvaluation(cast("abc", BooleanType), null)
checkEvaluation(cast("", BooleanType), null)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ SELECT false AS `false`;

SELECT boolean('t') AS true;

-- [SPARK-27931] Trim the string when cast string type to boolean type
SELECT boolean(' f ') AS `false`;

SELECT boolean('true') AS true;
Expand All @@ -49,12 +48,10 @@ SELECT boolean('no') AS `false`;
-- [SPARK-27923] PostgreSQL does not accept 'nay' but Spark SQL accepts it and sets it to NULL
SELECT boolean('nay') AS error;

-- [SPARK-27931] Accept 'on' and 'off' as input for boolean data type
SELECT boolean('on') AS true;

SELECT boolean('off') AS `false`;

-- [SPARK-27931] Accept unique prefixes thereof
SELECT boolean('of') AS `false`;

-- [SPARK-27923] PostgreSQL does not accept 'o' but Spark SQL accepts it and sets it to NULL
Expand Down Expand Up @@ -101,7 +98,7 @@ SELECT boolean('f') <= boolean('t') AS true;

-- explicit casts to/from text
SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`;
-- [SPARK-27931] Trim the string when cast to boolean type

SELECT boolean(string(' true ')) AS true,
boolean(string(' FALSE')) AS `false`;
SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ SELECT boolean(' f ') AS `false`
-- !query 4 schema
struct<false:boolean>
-- !query 4 output
NULL
false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz remove the comments, too:

-- [SPARK-27931] Trim the string when cast string type to boolean type

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the four places to remove.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done Thank you @maropu



-- !query 5
Expand Down Expand Up @@ -127,23 +127,23 @@ SELECT boolean('on') AS true
-- !query 15 schema
struct<true:boolean>
-- !query 15 output
NULL
true


-- !query 16
SELECT boolean('off') AS `false`
-- !query 16 schema
struct<false:boolean>
-- !query 16 output
NULL
false


-- !query 17
SELECT boolean('of') AS `false`
-- !query 17 schema
struct<false:boolean>
-- !query 17 output
NULL
false


-- !query 18
Expand Down Expand Up @@ -296,7 +296,7 @@ SELECT boolean(string(' true ')) AS true,
-- !query 36 schema
struct<true:boolean,false:boolean>
-- !query 36 output
NULL NULL
true false


-- !query 37
Expand Down