|
5 | 5 | using System.Buffers; |
6 | 6 | using System.Buffers.Text; |
7 | 7 | using System.Diagnostics; |
| 8 | +using System.Numerics; |
8 | 9 | using System.Runtime.CompilerServices; |
9 | 10 | using System.Runtime.InteropServices; |
10 | 11 | using System.Text.Encodings.Web; |
11 | 12 |
|
| 13 | +#if BUILDING_INBOX_LIBRARY |
| 14 | +using System.Runtime.Intrinsics; |
| 15 | +using System.Runtime.Intrinsics.X86; |
| 16 | +#endif |
| 17 | + |
12 | 18 | namespace System.Text.Json |
13 | 19 | { |
14 | 20 | // TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509 |
@@ -55,57 +61,202 @@ internal static partial class JsonWriterHelper |
55 | 61 | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
56 | 62 | private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0; |
57 | 63 |
|
58 | | - public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder) |
| 64 | +#if BUILDING_INBOX_LIBRARY |
| 65 | + private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' ' |
| 66 | + |
| 67 | + private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"' |
| 68 | + private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&' |
| 69 | + private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe ''' |
| 70 | + private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+' |
| 71 | + private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<' |
| 72 | + private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>' |
| 73 | + private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\' |
| 74 | + private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`' |
| 75 | + |
| 76 | + private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~' |
| 77 | + |
| 78 | + private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' ' |
| 79 | + |
| 80 | + private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"' |
| 81 | + private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&' |
| 82 | + private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe ''' |
| 83 | + private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+' |
| 84 | + private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<' |
| 85 | + private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>' |
| 86 | + private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\' |
| 87 | + private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`' |
| 88 | + |
| 89 | + [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| 90 | + private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue) |
59 | 91 | { |
60 | | - int idx; |
| 92 | + Debug.Assert(Sse2.IsSupported); |
61 | 93 |
|
62 | | - if (encoder != null) |
63 | | - { |
64 | | - idx = encoder.FindFirstCharacterToEncodeUtf8(value); |
65 | | - goto Return; |
66 | | - } |
| 94 | + Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range |
| 95 | + |
| 96 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"' |
| 97 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&' |
| 98 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe ''' |
| 99 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+' |
| 100 | + |
| 101 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<' |
| 102 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>' |
| 103 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\' |
| 104 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`' |
| 105 | + |
| 106 | + mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range |
| 107 | + |
| 108 | + return mask; |
| 109 | + } |
| 110 | + |
| 111 | + [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| 112 | + private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue) |
| 113 | + { |
| 114 | + Debug.Assert(Sse2.IsSupported); |
67 | 115 |
|
68 | | - for (idx = 0; idx < value.Length; idx++) |
| 116 | + Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E |
| 117 | + |
| 118 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark " |
| 119 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand & |
| 120 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe ' |
| 121 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign + |
| 122 | + |
| 123 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign < |
| 124 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign > |
| 125 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \ |
| 126 | + mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access ` |
| 127 | + |
| 128 | + return mask; |
| 129 | + } |
| 130 | +#endif |
| 131 | + |
| 132 | + public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder) |
| 133 | + { |
| 134 | + fixed (byte* ptr = value) |
69 | 135 | { |
70 | | - if (NeedsEscaping(value[idx])) |
| 136 | + int idx = 0; |
| 137 | + |
| 138 | + if (encoder != null) |
71 | 139 | { |
| 140 | + idx = encoder.FindFirstCharacterToEncodeUtf8(value); |
72 | 141 | goto Return; |
73 | 142 | } |
74 | | - } |
75 | 143 |
|
76 | | - idx = -1; // all characters allowed |
| 144 | +#if BUILDING_INBOX_LIBRARY |
| 145 | + if (Sse2.IsSupported) |
| 146 | + { |
| 147 | + sbyte* startingAddress = (sbyte*)ptr; |
| 148 | + while (value.Length - 16 >= idx) |
| 149 | + { |
| 150 | + Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16)); |
| 151 | + |
| 152 | + // Load the next 16 bytes. |
| 153 | + Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress); |
| 154 | + |
| 155 | + // Check if any of the 16 bytes need to be escaped. |
| 156 | + Vector128<sbyte> mask = CreateEscapingMask(sourceValue); |
| 157 | + |
| 158 | + int index = Sse2.MoveMask(mask.AsByte()); |
| 159 | + // If index == 0, that means none of the 16 bytes needed to be escaped. |
| 160 | + // TrailingZeroCount is relatively expensive, avoid it if possible. |
| 161 | + if (index != 0) |
| 162 | + { |
| 163 | + // Found at least one byte that needs to be escaped, figure out the index of |
| 164 | + // the first one found that needed to be escaped within the 16 bytes. |
| 165 | + Debug.Assert(index > 0 && index <= 65_535); |
| 166 | + int tzc = BitOperations.TrailingZeroCount(index); |
| 167 | + Debug.Assert(tzc >= 0 && tzc <= 16); |
| 168 | + idx += tzc; |
| 169 | + goto Return; |
| 170 | + } |
| 171 | + idx += 16; |
| 172 | + startingAddress += 16; |
| 173 | + } |
| 174 | + |
| 175 | + // Process the remaining characters. |
| 176 | + Debug.Assert(value.Length - idx < 16); |
| 177 | + } |
| 178 | +#endif |
| 179 | + |
| 180 | + for (; idx < value.Length; idx++) |
| 181 | + { |
| 182 | + Debug.Assert((ptr + idx) <= (ptr + value.Length)); |
| 183 | + if (NeedsEscaping(*(ptr + idx))) |
| 184 | + { |
| 185 | + goto Return; |
| 186 | + } |
| 187 | + } |
77 | 188 |
|
78 | | - Return: |
79 | | - return idx; |
| 189 | + idx = -1; // all characters allowed |
| 190 | + |
| 191 | + Return: |
| 192 | + return idx; |
| 193 | + } |
80 | 194 | } |
81 | 195 |
|
82 | 196 | public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder) |
83 | 197 | { |
84 | | - int idx; |
85 | | - |
86 | | - // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept |
87 | | - // null pointers and gaurd against that. Hence, check up-front and fall down to return -1. |
88 | | - if (encoder != null && !value.IsEmpty) |
| 198 | + fixed (char* ptr = value) |
89 | 199 | { |
90 | | - fixed (char* ptr = value) |
| 200 | + int idx = 0; |
| 201 | + |
| 202 | + // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept |
| 203 | + // null pointers and gaurd against that. Hence, check up-front and fall down to return -1. |
| 204 | + if (encoder != null && !value.IsEmpty) |
91 | 205 | { |
92 | 206 | idx = encoder.FindFirstCharacterToEncode(ptr, value.Length); |
| 207 | + goto Return; |
93 | 208 | } |
94 | | - goto Return; |
95 | | - } |
96 | 209 |
|
97 | | - for (idx = 0; idx < value.Length; idx++) |
98 | | - { |
99 | | - if (NeedsEscaping(value[idx])) |
| 210 | +#if BUILDING_INBOX_LIBRARY |
| 211 | + if (Sse2.IsSupported) |
100 | 212 | { |
101 | | - goto Return; |
| 213 | + short* startingAddress = (short*)ptr; |
| 214 | + while (value.Length - 8 >= idx) |
| 215 | + { |
| 216 | + Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8)); |
| 217 | + |
| 218 | + // Load the next 8 characters. |
| 219 | + Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress); |
| 220 | + |
| 221 | + // Check if any of the 8 characters need to be escaped. |
| 222 | + Vector128<short> mask = CreateEscapingMask(sourceValue); |
| 223 | + |
| 224 | + int index = Sse2.MoveMask(mask.AsByte()); |
| 225 | + // If index == 0, that means none of the 8 characters needed to be escaped. |
| 226 | + // TrailingZeroCount is relatively expensive, avoid it if possible. |
| 227 | + if (index != 0) |
| 228 | + { |
| 229 | + // Found at least one character that needs to be escaped, figure out the index of |
| 230 | + // the first one found that needed to be escaped within the 8 characters. |
| 231 | + Debug.Assert(index > 0 && index <= 65_535); |
| 232 | + int tzc = BitOperations.TrailingZeroCount(index); |
| 233 | + Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16); |
| 234 | + idx += tzc >> 1; |
| 235 | + goto Return; |
| 236 | + } |
| 237 | + idx += 8; |
| 238 | + startingAddress += 8; |
| 239 | + } |
| 240 | + |
| 241 | + // Process the remaining characters. |
| 242 | + Debug.Assert(value.Length - idx < 8); |
| 243 | + } |
| 244 | +#endif |
| 245 | + |
| 246 | + for (; idx < value.Length; idx++) |
| 247 | + { |
| 248 | + Debug.Assert((ptr + idx) <= (ptr + value.Length)); |
| 249 | + if (NeedsEscaping(*(ptr + idx))) |
| 250 | + { |
| 251 | + goto Return; |
| 252 | + } |
102 | 253 | } |
103 | | - } |
104 | 254 |
|
105 | | - idx = -1; // all characters allowed |
| 255 | + idx = -1; // All characters are allowed. |
106 | 256 |
|
107 | | - Return: |
108 | | - return idx; |
| 257 | + Return: |
| 258 | + return idx; |
| 259 | + } |
109 | 260 | } |
110 | 261 |
|
111 | 262 | public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape) |
|
0 commit comments