Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -542,12 +542,13 @@ private void buildJson(JsonParser parser) throws IOException {
}

// Choose the smallest unsigned integer type that can store `value`. It must be within
// `[0, U24_MAX]`.
// `[0, SIZE_LIMIT]`.
private int getIntegerSize(int value) {
assert value >= 0 && value <= U24_MAX;
assert value >= 0 && value <= SIZE_LIMIT;
if (value <= U8_MAX) return 1;
if (value <= U16_MAX) return 2;
return U24_SIZE;
if (value <= U24_MAX) return 3;
return 4;
}

private void parseFloatingPoint(JsonParser parser) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ public class VariantUtil {
public static final int U24_SIZE = 3;
public static final int U32_SIZE = 4;

// Both variant value and variant metadata need to be no longer than 16MiB.
public static final int SIZE_LIMIT = U24_MAX + 1;
// Both variant value and variant metadata need to be no longer than 128MiB.
public static final int SIZE_LIMIT = 128 * 1024 * 1024;

public static final int MAX_DECIMAL4_PRECISION = 9;
public static final int MAX_DECIMAL8_PRECISION = 18;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,9 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
checkException(json, "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
Map("badRecord" -> json, "failFastMode" -> "FAILFAST"))
}
for (json <- Seq("\"" + "a" * (16 * 1024 * 1024) + "\"",
(0 to 4 * 1024 * 1024).mkString("[", ",", "]"))) {
for (json <- Seq((0 to 32 * 1024 * 1024).mkString("[", ",", "]"))) {
checkException(json, "VARIANT_SIZE_LIMIT",
Map("sizeLimit" -> "16.0 MiB", "functionName" -> "`parse_json`"))
Map("sizeLimit" -> "128.0 MiB", "functionName" -> "`parse_json`"))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,21 +93,21 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
check(Array(primitiveHeader(INT1), 0), Array[Byte](3, 0, 0))
check(Array(primitiveHeader(INT1), 0), Array[Byte](2, 0, 0))

// Construct binary values that are over 1 << 24 bytes, but otherwise valid.
// Construct binary values that are over SIZE_LIMIT bytes, but otherwise valid.
val bigVersion = Array[Byte]((VERSION | (3 << 6)).toByte)
val a = Array.fill(1 << 24)('a'.toByte)
val a = Array.fill(SIZE_LIMIT)('a'.toByte)
val hugeMetadata = bigVersion ++ Array[Byte](2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1) ++
a ++ Array[Byte]('b')
check(Array(primitiveHeader(TRUE)), hugeMetadata, "VARIANT_CONSTRUCTOR_SIZE_LIMIT")

// The keys are 'aaa....' and 'b'. Values are "yyy..." and 'true'.
val y = Array.fill(1 << 24)('y'.toByte)
val y = Array.fill(SIZE_LIMIT)('y'.toByte)
val hugeObject = Array[Byte](objectHeader(true, 4, 4)) ++
/* size */ padded(Array(2), 4) ++
/* id list */ padded(Array(0, 1), 4) ++
// Second value starts at offset 5 + (1 << 24), which is `5001` little-endian. The last value
// is 1 byte, so the one-past-the-end value is `6001`
/* offset list */ Array[Byte](0, 0, 0, 0, 5, 0, 0, 1, 6, 0, 0, 1) ++
// Second value starts at offset 5 + (SIZE_LIMIT), which is `5008` little-endian. The last
// value is 1 byte, so the one-past-the-end value is `6008`
/* offset list */ Array[Byte](0, 0, 0, 0, 5, 0, 0, 8, 6, 0, 0, 8) ++
/* field data */ Array[Byte](primitiveHeader(LONG_STR), 0, 0, 0, 1) ++ y ++ Array[Byte](
primitiveHeader(TRUE)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
check("{1:2}", null)
check("{\"a\":1", null)
check("{\"a\":[a,b,c]}", null)
check("\"" + "a" * (16 * 1024 * 1024) + "\"", null)
check("\"" + "a" * (16 * 1024 * 1024) + "\"")
check("\"" + "a" * (128 * 1024 * 1024) + "\"", null)
}

test("to_json with nested variant") {
Expand Down