-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[JSONSerialization] Use Int64 and UInt64 for integer parsing #1654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@swift-ci Please test |
|
@swift-ci please test |
| defer { int64EndPointer.deallocate() } | ||
| let int64Result = strtoll(startPointer, int64EndPointer, 10) | ||
| let int64Distance = startPointer.distance(to: int64EndPointer[0]!) | ||
| if int64Distance == doubleDistance { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe pointer distances are sufficient to tell whether the numeric values are equivalent. For instance, on my machine:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
double parseDouble(const char *string, ptrdiff_t *length)
{
char *end = NULL;
double result = strtod(string, &end);
if (length) *length = end - string;
return result;
}
int64_t parseInt64(const char *string, ptrdiff_t *length)
{
char *end = NULL;
int64_t result = strtoll(string, &end, 10);
if (length) *length = end - string;
return result;
}
uint64_t parseUInt64(const char *string, ptrdiff_t *length)
{
char *end = NULL;
uint64_t result = strtoull(string, &end, 10);
if (length) *length = end - string;
return result;
}
int main(void)
{
const char *strings[] = {
"123.4",
"123.456",
"123",
"56",
"-9223372036854775808", // INT64_MIN
"9223372036854775807", // INT64_MAX
"18446744073709551615", // UINT64_MAX
};
for (size_t i = 0; i < sizeof(strings)/sizeof(strings[0]); i += 1) {
ptrdiff_t d_length = 0;
double d_result = parseDouble(strings[i], &d_length);
ptrdiff_t i_length = 0;
int64_t i_result = parseInt64(strings[i], &i_length);
ptrdiff_t u_length = 0;
uint64_t u_result = parseUInt64(strings[i], &u_length);
printf("%f (%ld)\t|\t%lld (%ld)\t|\t%llu (%ld)\n", d_result, d_length, i_result, i_length, u_result, u_length);
}
}produces
123.400000 (5) | 123 (3) | 123 (3)
123.456000 (7) | 123 (3) | 123 (3)
123.000000 (3) | 123 (3) | 123 (3)
56.000000 (2) | 56 (2) | 56 (2)
-9223372036854775808.000000 (20) | -9223372036854775808 (20) | 9223372036854775808 (20)
9223372036854775808.000000 (19) | 9223372036854775807 (19) | 9223372036854775807 (19)
18446744073709551616.000000 (20) | 9223372036854775807 (20) | 18446744073709551615 (20)
For UINT64_MAX, for instance, strtod, strtoll, and strtoull all read the same number of characters but produce very different results — only strtoull returns the correct results. This might've worked when we just had Double and Int, but no longer; we'll likely need to take a more involved approach here:
- Attempt
strtod. If the result is 0 and*doubleEndPointeris notNUL, bail out (failure to parse); if the result is ±HUGE_VALanderrnois set toERANGE(and wasn't before), it's too large to fit in aDouble, in which case bail - Attempt
strtoll. If the result is 0 and*int64EndPointeris notNUL, return theDoublevalue if parsed correctly (otherwise bail, failure to parse). If the result isLLONG_MAXorLLONG_MINanderrnois set toERANGEthen bail ifLLONG_MIN(since the value was negative and won't be parsed correctly bystrtoull). Also, if the string started with a-, return this int value as there's no point in parsing as unsigned - Attempt
strtoull. If the result is 0 and*uint64EndPointeris notNUL, bail; if the result isULLONG_MAXanderrnois set toERANGE, bail. - At this point, if
doubleDistanceis > thanint64Distanceanduint64Distance, return theDoublevalue; otherwise, theInt64value if it fit in anInt64; otherwise, theUInt64value
I don't remember if Swift has any good facilities for working with errno as it is allowed to be a macro IIRC but I can take a look. I'll sketch this out in C to clarify the intent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To illustrate, this is what I had in mind (rough code, no testing yet):
func parseTypedNumber(_ address: UnsafePointer<UInt8>, count: Int) -> (Any, IndexDistance)? {
let temp_buffer_size = 64
var temp_buffer = [Int8](repeating: 0, count: temp_buffer_size)
return temp_buffer.withUnsafeMutableBufferPointer { (buffer: inout UnsafeMutableBufferPointer<Int8>) -> (Any, IndexDistance)? in
memcpy(buffer.baseAddress!, address, min(count, temp_buffer_size - 1)) // Ensure null termination
func parseDouble(_ buffer: UnsafePointer<Int8>) -> (result: Double, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtod(buffer, &end)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && result.magnitude == .greatestFiniteMagnitude
return (result, buffer.distance(to: end!), overflow)
}
func parseInt64(_ buffer: UnsafePointer<Int8>) -> (result: Int64, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtoll(buffer, &end, 10)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && (result == .max || result == .min)
return (result, buffer.distance(to: end!), overflow)
}
func parseUInt64(_ buffer: UnsafePointer<Int8>) -> (result: UInt64, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtoull(buffer, &end, 10)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && result == .max
return (result, buffer.distance(to: end!), overflow)
}
// If we can't parse as a Double or if the value would overflow a Double, there's no point in parsing as an integer.
guard let parsedDouble = parseDouble(buffer.baseAddress!), !parsedDouble.overflow else {
return nil
}
guard let parsedInt64 = parseInt64(buffer.baseAddress!) else {
// If we couldn't parse as an Int64 but succeeded as a Double, we must have a Double.
return (NSNumber(value: parsedDouble.result), parsedDouble.length)
}
if parsedInt64.overflow {
// We have something that looks like an integer but didn't fit in an Int64.
// If it's not a negative number, it might be worth parsing as a UInt64.
guard buffer.baseAddress![0] != 0x2D /* minus */,
let parsedUInt64 = parseUInt64(buffer.baseAddress!),
!parsedUInt64.overflow else {
// Only representable as a Double:
// * The value was negative, or
// * It failed to parse as a UInt64, or
// * It was too large even for a UInt64.
return (NSNumber(value: parsedDouble.result), parsedDouble.length)
}
return (NSNumber(value: parsedUInt64.result), parsedUInt64.length)
}
// We succeeded in parsing in an Int64 without overflow. Good enough.
return (NSNumber(value: parsedInt64.result), parsedInt64.length)
}
}/cc @spevans
We can also introduce Decimal into the above to better represent Double values to preserve as much precision as possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the above code:
let strings = [
"123.4",
"123.456",
"123",
"56",
"-9223372036854775808", // INT64_MIN
"9223372036854775807", // INT64_MAX
"18446744073709551615", // UINT64_MAX
]
for string in strings {
string.withCString {
let length = strlen($0)
guard let (result, resultLength) = parseTypedNumber($0, count: length) else {
print("Failed to parse \(string)")
return
}
print("\(result) (\(Unicode.Scalar(UInt8((result as! NSNumber).objCType[0]))), \(resultLength))")
}
}yields
123.4 (d, 5)
123.456 (d, 7)
123 (q, 3)
56 (q, 2)
-9223372036854775808 (q, 20)
9223372036854775807 (q, 19)
18446744073709551615 (Q, 20)
So this might get us a bit closer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, yes, this is quite a bit more involved than I'd thought. In any case, though, to match Darwin behavior fully, we would ultimately need something like #1655. This was meant to be an incremental improvement in the meantime that, given my responsibility for #1634, I could feasibly contribute in some spare time. However, @spevans is much further along here and I'll go ahead and close this.
A follow-up to #1634 to expand the range of integers parsed to align behavior more closely with Darwin, as suggested by @itaiferber.
(There will still be residual differences in parsing between macOS and Linux owing to use of
Decimalin the former.)