Skip to content

Commit ac262cb

Browse files
cloud-fanmaropu
authored andcommitted
[SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
### What changes were proposed in this pull request? This is a followup of #26933 Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode. ### Why are the changes needed? correct the ANSI cast behavior from string to integral ### Does this PR introduce any user-facing change? Yes under ANSI mode, but ANSI mode is off by default. ### How was this patch tested? new test Closes #27957 from cloud-fan/ansi. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Takeshi Yamamuro <[email protected]>
1 parent a177628 commit ac262cb

File tree

2 files changed

+16
-10
lines changed
  • common/unsafe/src/main/java/org/apache/spark/unsafe/types
  • sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions

2 files changed

+16
-10
lines changed

common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,10 @@ public static class IntWrapper implements Serializable {
11051105
* @return true if the parsing was successful else false
11061106
*/
11071107
public boolean toLong(LongWrapper toLongResult) {
1108+
return toLong(toLongResult, true);
1109+
}
1110+
1111+
private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
11081112
int offset = 0;
11091113
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
11101114
if (offset == this.numBytes) return false;
@@ -1129,7 +1133,7 @@ public boolean toLong(LongWrapper toLongResult) {
11291133
while (offset <= end) {
11301134
b = getByte(offset);
11311135
offset++;
1132-
if (b == separator) {
1136+
if (b == separator && allowDecimal) {
11331137
// We allow decimals and will return a truncated integral in that case.
11341138
// Therefore we won't throw an exception here (checking the fractional
11351139
// part happens below.)
@@ -1198,6 +1202,10 @@ public boolean toLong(LongWrapper toLongResult) {
11981202
* @return true if the parsing was successful else false
11991203
*/
12001204
public boolean toInt(IntWrapper intWrapper) {
1205+
return toInt(intWrapper, true);
1206+
}
1207+
1208+
private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
12011209
int offset = 0;
12021210
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
12031211
if (offset == this.numBytes) return false;
@@ -1222,7 +1230,7 @@ public boolean toInt(IntWrapper intWrapper) {
12221230
while (offset <= end) {
12231231
b = getByte(offset);
12241232
offset++;
1225-
if (b == separator) {
1233+
if (b == separator && allowDecimal) {
12261234
// We allow decimals and will return a truncated integral in that case.
12271235
// Therefore we won't throw an exception here (checking the fractional
12281236
// part happens below.)
@@ -1276,9 +1284,7 @@ public boolean toShort(IntWrapper intWrapper) {
12761284
if (toInt(intWrapper)) {
12771285
int intValue = intWrapper.value;
12781286
short result = (short) intValue;
1279-
if (result == intValue) {
1280-
return true;
1281-
}
1287+
return result == intValue;
12821288
}
12831289
return false;
12841290
}
@@ -1287,9 +1293,7 @@ public boolean toByte(IntWrapper intWrapper) {
12871293
if (toInt(intWrapper)) {
12881294
int intValue = intWrapper.value;
12891295
byte result = (byte) intValue;
1290-
if (result == intValue) {
1291-
return true;
1292-
}
1296+
return result == intValue;
12931297
}
12941298
return false;
12951299
}
@@ -1302,7 +1306,7 @@ public boolean toByte(IntWrapper intWrapper) {
13021306
*/
13031307
public long toLongExact() {
13041308
LongWrapper result = new LongWrapper();
1305-
if (toLong(result)) {
1309+
if (toLong(result, false)) {
13061310
return result.value;
13071311
}
13081312
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
@@ -1316,7 +1320,7 @@ public long toLongExact() {
13161320
*/
13171321
public int toIntExact() {
13181322
IntWrapper result = new IntWrapper();
1319-
if (toInt(result)) {
1323+
if (toInt(result, false)) {
13201324
return result.value;
13211325
}
13221326
throw new NumberFormatException("invalid input syntax for type numeric: " + this);

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase {
12871287
cast("123-string", dataType), "invalid input")
12881288
checkExceptionInExpression[NumberFormatException](
12891289
cast("2020-07-19", dataType), "invalid input")
1290+
checkExceptionInExpression[NumberFormatException](
1291+
cast("1.23", dataType), "invalid input")
12901292
}
12911293

12921294
Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>

0 commit comments

Comments
 (0)