-
Notifications
You must be signed in to change notification settings - Fork 4.9k
When encoder is null, use JavaScriptEncoder.Default to check for NeedsEscaping #42023
Changes from all commits
bad1c74
966cf0b
07295c9
e74a9a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,16 +5,9 @@ | |
| using System.Buffers; | ||
| using System.Buffers.Text; | ||
| using System.Diagnostics; | ||
| using System.Numerics; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
| using System.Runtime.CompilerServices; // Do not remove. Needed for Int32LsbToHexDigit when !BUILDING_INBOX_LIBRARY | ||
| using System.Text.Encodings.Web; | ||
|
|
||
| #if BUILDING_INBOX_LIBRARY | ||
| using System.Runtime.Intrinsics; | ||
| using System.Runtime.Intrinsics.X86; | ||
| #endif | ||
|
|
||
| namespace System.Text.Json | ||
| { | ||
| // TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509 | ||
|
|
@@ -58,204 +51,23 @@ internal static partial class JsonWriterHelper | |
|
|
||
| private static bool NeedsEscapingNoBoundsCheck(char value) => AllowList[value] == 0; | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0; | ||
|
|
||
| #if BUILDING_INBOX_LIBRARY | ||
| private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' ' | ||
|
|
||
| private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe ''' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\' | ||
| private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`' | ||
|
|
||
| private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~' | ||
|
|
||
| private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' ' | ||
|
|
||
| private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe ''' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\' | ||
| private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`' | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue) | ||
| { | ||
| Debug.Assert(Sse2.IsSupported); | ||
|
|
||
| Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range | ||
|
|
||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe ''' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+' | ||
|
|
||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`' | ||
|
|
||
| mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range | ||
|
|
||
| return mask; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue) | ||
| public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder) | ||
| { | ||
| Debug.Assert(Sse2.IsSupported); | ||
|
|
||
| Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E | ||
|
|
||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark " | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand & | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe ' | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign + | ||
|
|
||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign < | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign > | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \ | ||
| mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access ` | ||
|
|
||
| return mask; | ||
| return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncodeUtf8(value); | ||
| } | ||
| #endif | ||
|
|
||
| public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder) | ||
| public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder) | ||
| { | ||
| fixed (byte* ptr = value) | ||
| // Some implementations of JavaScriptEncoder.FindFirstCharacterToEncode may not accept | ||
| // null pointers and gaurd against that. Hence, check up-front to return -1. | ||
| if (value.IsEmpty) | ||
| { | ||
| int idx = 0; | ||
|
|
||
| if (encoder != null) | ||
| { | ||
| idx = encoder.FindFirstCharacterToEncodeUtf8(value); | ||
| goto Return; | ||
| } | ||
|
|
||
| #if BUILDING_INBOX_LIBRARY | ||
| if (Sse2.IsSupported) | ||
| { | ||
| sbyte* startingAddress = (sbyte*)ptr; | ||
| while (value.Length - 16 >= idx) | ||
| { | ||
| Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16)); | ||
|
|
||
| // Load the next 16 bytes. | ||
| Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress); | ||
|
|
||
| // Check if any of the 16 bytes need to be escaped. | ||
| Vector128<sbyte> mask = CreateEscapingMask(sourceValue); | ||
|
|
||
| int index = Sse2.MoveMask(mask.AsByte()); | ||
| // If index == 0, that means none of the 16 bytes needed to be escaped. | ||
| // TrailingZeroCount is relatively expensive, avoid it if possible. | ||
| if (index != 0) | ||
| { | ||
| // Found at least one byte that needs to be escaped, figure out the index of | ||
| // the first one found that needed to be escaped within the 16 bytes. | ||
| Debug.Assert(index > 0 && index <= 65_535); | ||
| int tzc = BitOperations.TrailingZeroCount(index); | ||
| Debug.Assert(tzc >= 0 && tzc <= 16); | ||
| idx += tzc; | ||
| goto Return; | ||
| } | ||
| idx += 16; | ||
| startingAddress += 16; | ||
| } | ||
|
|
||
| // Process the remaining characters. | ||
| Debug.Assert(value.Length - idx < 16); | ||
| } | ||
| #endif | ||
|
|
||
| for (; idx < value.Length; idx++) | ||
| { | ||
| Debug.Assert((ptr + idx) <= (ptr + value.Length)); | ||
| if (NeedsEscaping(*(ptr + idx))) | ||
| { | ||
| goto Return; | ||
| } | ||
| } | ||
|
|
||
| idx = -1; // all characters allowed | ||
|
|
||
| Return: | ||
| return idx; | ||
| return -1; | ||
| } | ||
| } | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not much perf benefit on its own, but we do produce smaller disassembly without having multiple return points in a fixed block, which could help with inlining. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. really? it seems like the difference would be a
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Never mind, re-checked the assembly diff, and removing the goto is better in terms of asm size. // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
if (value.IsEmpty)
{
return -1; // All characters are allowed.
}
fixed (char* ptr = value)
{
return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length);
}versus public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
{
fixed (char* ptr = value)
{
int idx = 0;
// Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
if (value.IsEmpty)
{
idx = -1; // All characters are allowed.
goto Return;
}
idx = (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length);
Return:
return idx;
}
}See diff (3 fewer instructions total): |
||
| public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder) | ||
| { | ||
| fixed (char* ptr = value) | ||
| { | ||
| int idx = 0; | ||
|
|
||
| // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept | ||
| // null pointers and gaurd against that. Hence, check up-front and fall down to return -1. | ||
| if (encoder != null && !value.IsEmpty) | ||
| { | ||
| idx = encoder.FindFirstCharacterToEncode(ptr, value.Length); | ||
| goto Return; | ||
| } | ||
|
|
||
| #if BUILDING_INBOX_LIBRARY | ||
| if (Sse2.IsSupported) | ||
| { | ||
| short* startingAddress = (short*)ptr; | ||
| while (value.Length - 8 >= idx) | ||
| { | ||
| Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8)); | ||
|
|
||
| // Load the next 8 characters. | ||
| Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress); | ||
|
|
||
| // Check if any of the 8 characters need to be escaped. | ||
| Vector128<short> mask = CreateEscapingMask(sourceValue); | ||
|
|
||
| int index = Sse2.MoveMask(mask.AsByte()); | ||
| // If index == 0, that means none of the 8 characters needed to be escaped. | ||
| // TrailingZeroCount is relatively expensive, avoid it if possible. | ||
| if (index != 0) | ||
| { | ||
| // Found at least one character that needs to be escaped, figure out the index of | ||
| // the first one found that needed to be escaped within the 8 characters. | ||
| Debug.Assert(index > 0 && index <= 65_535); | ||
| int tzc = BitOperations.TrailingZeroCount(index); | ||
| Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16); | ||
| idx += tzc >> 1; | ||
| goto Return; | ||
| } | ||
| idx += 8; | ||
| startingAddress += 8; | ||
| } | ||
|
|
||
| // Process the remaining characters. | ||
| Debug.Assert(value.Length - idx < 8); | ||
| } | ||
| #endif | ||
|
|
||
| for (; idx < value.Length; idx++) | ||
| { | ||
| Debug.Assert((ptr + idx) <= (ptr + value.Length)); | ||
| if (NeedsEscaping(*(ptr + idx))) | ||
| { | ||
| goto Return; | ||
| } | ||
| } | ||
|
|
||
| idx = -1; // All characters are allowed. | ||
|
|
||
| Return: | ||
| return idx; | ||
| return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put into
#if !BUILDING_INBOX_LIBRARYto make this (code-) explicit?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea. I will do this separately (for master) since it is primarily a cosmetic change and I want to cherry-pick this change for 3.1 (was waiting on CI to be green for a while :p).