Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 7cae92b

Browse files
authored
Use Sse2 instrinsics to make NeedsEscaping check faster for large JSON strings (#41845)
* Use Sse2 instrinsics to make NeedsEscaping check faster for large strings. * Update the utf-8 bytes needsescaping and add tests. * Remove unnecessary bitwise OR and add more tests * Add more tests around surrogates, invalid strings, and characters > short.MaxValue.
1 parent 4a7075f commit 7cae92b

File tree

3 files changed

+571
-29
lines changed

3 files changed

+571
-29
lines changed

src/System.Text.Json/src/System.Text.Json.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@
189189
<Reference Include="System.Resources.ResourceManager" />
190190
<Reference Include="System.Runtime" />
191191
<Reference Include="System.Runtime.Extensions" />
192+
<Reference Include="System.Runtime.Intrinsics" />
192193
<Reference Include="System.Text.Encoding.Extensions" />
193194
</ItemGroup>
194195
<ItemGroup>

src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs

Lines changed: 180 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
using System.Buffers;
66
using System.Buffers.Text;
77
using System.Diagnostics;
8+
using System.Numerics;
89
using System.Runtime.CompilerServices;
910
using System.Runtime.InteropServices;
1011
using System.Text.Encodings.Web;
1112

13+
#if BUILDING_INBOX_LIBRARY
14+
using System.Runtime.Intrinsics;
15+
using System.Runtime.Intrinsics.X86;
16+
#endif
17+
1218
namespace System.Text.Json
1319
{
1420
// TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
@@ -55,57 +61,202 @@ internal static partial class JsonWriterHelper
5561
[MethodImpl(MethodImplOptions.AggressiveInlining)]
5662
private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0;
5763

58-
public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
64+
#if BUILDING_INBOX_LIBRARY
65+
private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
66+
67+
private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
68+
private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
69+
private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
70+
private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
71+
private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
72+
private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
73+
private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
74+
private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'
75+
76+
private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'
77+
78+
private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '
79+
80+
private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
81+
private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
82+
private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
83+
private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
84+
private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
85+
private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
86+
private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
87+
private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
88+
89+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
90+
private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue)
5991
{
60-
int idx;
92+
Debug.Assert(Sse2.IsSupported);
6193

62-
if (encoder != null)
63-
{
64-
idx = encoder.FindFirstCharacterToEncodeUtf8(value);
65-
goto Return;
66-
}
94+
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range
95+
96+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
97+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
98+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
99+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
100+
101+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
102+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
103+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
104+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'
105+
106+
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
107+
108+
return mask;
109+
}
110+
111+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
112+
private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue)
113+
{
114+
Debug.Assert(Sse2.IsSupported);
67115

68-
for (idx = 0; idx < value.Length; idx++)
116+
Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
117+
118+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
119+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
120+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
121+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
122+
123+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
124+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
125+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
126+
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
127+
128+
return mask;
129+
}
130+
#endif
131+
132+
public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
133+
{
134+
fixed (byte* ptr = value)
69135
{
70-
if (NeedsEscaping(value[idx]))
136+
int idx = 0;
137+
138+
if (encoder != null)
71139
{
140+
idx = encoder.FindFirstCharacterToEncodeUtf8(value);
72141
goto Return;
73142
}
74-
}
75143

76-
idx = -1; // all characters allowed
144+
#if BUILDING_INBOX_LIBRARY
145+
if (Sse2.IsSupported)
146+
{
147+
sbyte* startingAddress = (sbyte*)ptr;
148+
while (value.Length - 16 >= idx)
149+
{
150+
Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16));
151+
152+
// Load the next 16 bytes.
153+
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
154+
155+
// Check if any of the 16 bytes need to be escaped.
156+
Vector128<sbyte> mask = CreateEscapingMask(sourceValue);
157+
158+
int index = Sse2.MoveMask(mask.AsByte());
159+
// If index == 0, that means none of the 16 bytes needed to be escaped.
160+
// TrailingZeroCount is relatively expensive, avoid it if possible.
161+
if (index != 0)
162+
{
163+
// Found at least one byte that needs to be escaped, figure out the index of
164+
// the first one found that needed to be escaped within the 16 bytes.
165+
Debug.Assert(index > 0 && index <= 65_535);
166+
int tzc = BitOperations.TrailingZeroCount(index);
167+
Debug.Assert(tzc >= 0 && tzc <= 16);
168+
idx += tzc;
169+
goto Return;
170+
}
171+
idx += 16;
172+
startingAddress += 16;
173+
}
174+
175+
// Process the remaining characters.
176+
Debug.Assert(value.Length - idx < 16);
177+
}
178+
#endif
179+
180+
for (; idx < value.Length; idx++)
181+
{
182+
Debug.Assert((ptr + idx) <= (ptr + value.Length));
183+
if (NeedsEscaping(*(ptr + idx)))
184+
{
185+
goto Return;
186+
}
187+
}
77188

78-
Return:
79-
return idx;
189+
idx = -1; // all characters allowed
190+
191+
Return:
192+
return idx;
193+
}
80194
}
81195

82196
public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
83197
{
84-
int idx;
85-
86-
// Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
87-
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
88-
if (encoder != null && !value.IsEmpty)
198+
fixed (char* ptr = value)
89199
{
90-
fixed (char* ptr = value)
200+
int idx = 0;
201+
202+
// Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
203+
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
204+
if (encoder != null && !value.IsEmpty)
91205
{
92206
idx = encoder.FindFirstCharacterToEncode(ptr, value.Length);
207+
goto Return;
93208
}
94-
goto Return;
95-
}
96209

97-
for (idx = 0; idx < value.Length; idx++)
98-
{
99-
if (NeedsEscaping(value[idx]))
210+
#if BUILDING_INBOX_LIBRARY
211+
if (Sse2.IsSupported)
100212
{
101-
goto Return;
213+
short* startingAddress = (short*)ptr;
214+
while (value.Length - 8 >= idx)
215+
{
216+
Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8));
217+
218+
// Load the next 8 characters.
219+
Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress);
220+
221+
// Check if any of the 8 characters need to be escaped.
222+
Vector128<short> mask = CreateEscapingMask(sourceValue);
223+
224+
int index = Sse2.MoveMask(mask.AsByte());
225+
// If index == 0, that means none of the 8 characters needed to be escaped.
226+
// TrailingZeroCount is relatively expensive, avoid it if possible.
227+
if (index != 0)
228+
{
229+
// Found at least one character that needs to be escaped, figure out the index of
230+
// the first one found that needed to be escaped within the 8 characters.
231+
Debug.Assert(index > 0 && index <= 65_535);
232+
int tzc = BitOperations.TrailingZeroCount(index);
233+
Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
234+
idx += tzc >> 1;
235+
goto Return;
236+
}
237+
idx += 8;
238+
startingAddress += 8;
239+
}
240+
241+
// Process the remaining characters.
242+
Debug.Assert(value.Length - idx < 8);
243+
}
244+
#endif
245+
246+
for (; idx < value.Length; idx++)
247+
{
248+
Debug.Assert((ptr + idx) <= (ptr + value.Length));
249+
if (NeedsEscaping(*(ptr + idx)))
250+
{
251+
goto Return;
252+
}
102253
}
103-
}
104254

105-
idx = -1; // all characters allowed
255+
idx = -1; // All characters are allowed.
106256

107-
Return:
108-
return idx;
257+
Return:
258+
return idx;
259+
}
109260
}
110261

111262
public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape)

0 commit comments

Comments
 (0)