From bad1c7477295679a1db1ba565c91e8538c697bd8 Mon Sep 17 00:00:00 2001 From: Ahson Khan Date: Tue, 22 Oct 2019 20:21:14 -0700 Subject: [PATCH 1/4] When encoder is null, use JavaScriptEncoder.Default to check for NeedsEscaping. --- .../src/System.Text.Json.csproj | 1 - .../Json/Writer/JsonWriterHelper.Escaping.cs | 194 +----------------- 2 files changed, 8 insertions(+), 187 deletions(-) diff --git a/src/System.Text.Json/src/System.Text.Json.csproj b/src/System.Text.Json/src/System.Text.Json.csproj index 45cb66c8039b..5eec31c1495c 100644 --- a/src/System.Text.Json/src/System.Text.Json.csproj +++ b/src/System.Text.Json/src/System.Text.Json.csproj @@ -189,7 +189,6 @@ - diff --git a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs index 5c6c27fb6346..525d8ed08bbe 100644 --- a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs +++ b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs @@ -5,16 +5,9 @@ using System.Buffers; using System.Buffers.Text; using System.Diagnostics; -using System.Numerics; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Text.Encodings.Web; -#if BUILDING_INBOX_LIBRARY -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif - namespace System.Text.Json { // TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509 @@ -58,139 +51,11 @@ internal static partial class JsonWriterHelper private static bool NeedsEscapingNoBoundsCheck(char value) => AllowList[value] == 0; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0; - -#if BUILDING_INBOX_LIBRARY - private static readonly Vector128 s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' ' - - private static readonly Vector128 s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"' - private static readonly Vector128 s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&' - private static readonly Vector128 s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe ''' - private static readonly Vector128 s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+' - private static readonly Vector128 s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<' - private static readonly Vector128 s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>' - private static readonly Vector128 s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\' - private static readonly Vector128 s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`' - - private static readonly Vector128 s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~' - - private static readonly Vector128 s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' ' - - private static readonly Vector128 s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"' - private static readonly Vector128 s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&' - private static readonly Vector128 s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe ''' - private static readonly Vector128 s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+' - private static readonly Vector128 s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<' - private static readonly Vector128 s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>' - private static readonly Vector128 s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\' - private static readonly Vector128 s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`' - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 CreateEscapingMask(Vector128 sourceValue) - { - Debug.Assert(Sse2.IsSupported); - - Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range - - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe ''' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+' - - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`' - - mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range - - return mask; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 CreateEscapingMask(Vector128 sourceValue) - { - Debug.Assert(Sse2.IsSupported); - - Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E - - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark " - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand & - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe ' - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign + - - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign < - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign > - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \ - mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access ` - - return mask; - } -#endif - public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) { - fixed (byte* ptr = value) - { - int idx = 0; - - if (encoder != null) - { - idx = encoder.FindFirstCharacterToEncodeUtf8(value); - goto Return; - } - -#if BUILDING_INBOX_LIBRARY - if (Sse2.IsSupported) - { - sbyte* startingAddress = (sbyte*)ptr; - while (value.Length - 16 >= idx) - { - Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16)); - - // Load the next 16 bytes. - Vector128 sourceValue = Sse2.LoadVector128(startingAddress); - - // Check if any of the 16 bytes need to be escaped. - Vector128 mask = CreateEscapingMask(sourceValue); - - int index = Sse2.MoveMask(mask.AsByte()); - // If index == 0, that means none of the 16 bytes needed to be escaped. - // TrailingZeroCount is relatively expensive, avoid it if possible. - if (index != 0) - { - // Found at least one byte that needs to be escaped, figure out the index of - // the first one found that needed to be escaped within the 16 bytes. - Debug.Assert(index > 0 && index <= 65_535); - int tzc = BitOperations.TrailingZeroCount(index); - Debug.Assert(tzc >= 0 && tzc <= 16); - idx += tzc; - goto Return; - } - idx += 16; - startingAddress += 16; - } - - // Process the remaining characters. - Debug.Assert(value.Length - idx < 16); - } -#endif - - for (; idx < value.Length; idx++) - { - Debug.Assert((ptr + idx) <= (ptr + value.Length)); - if (NeedsEscaping(*(ptr + idx))) - { - goto Return; - } - } - - idx = -1; // all characters allowed - - Return: - return idx; - } + return encoder == null ? + JavaScriptEncoder.Default.FindFirstCharacterToEncodeUtf8(value) : + encoder.FindFirstCharacterToEncodeUtf8(value); } public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) @@ -201,58 +66,15 @@ public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncod // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept // null pointers and gaurd against that. Hence, check up-front and fall down to return -1. - if (encoder != null && !value.IsEmpty) + if (value.IsEmpty) { - idx = encoder.FindFirstCharacterToEncode(ptr, value.Length); + idx = -1; // All characters are allowed. goto Return; } -#if BUILDING_INBOX_LIBRARY - if (Sse2.IsSupported) - { - short* startingAddress = (short*)ptr; - while (value.Length - 8 >= idx) - { - Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8)); - - // Load the next 8 characters. - Vector128 sourceValue = Sse2.LoadVector128(startingAddress); - - // Check if any of the 8 characters need to be escaped. - Vector128 mask = CreateEscapingMask(sourceValue); - - int index = Sse2.MoveMask(mask.AsByte()); - // If index == 0, that means none of the 8 characters needed to be escaped. - // TrailingZeroCount is relatively expensive, avoid it if possible. - if (index != 0) - { - // Found at least one character that needs to be escaped, figure out the index of - // the first one found that needed to be escaped within the 8 characters. - Debug.Assert(index > 0 && index <= 65_535); - int tzc = BitOperations.TrailingZeroCount(index); - Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16); - idx += tzc >> 1; - goto Return; - } - idx += 8; - startingAddress += 8; - } - - // Process the remaining characters. - Debug.Assert(value.Length - idx < 8); - } -#endif - - for (; idx < value.Length; idx++) - { - Debug.Assert((ptr + idx) <= (ptr + value.Length)); - if (NeedsEscaping(*(ptr + idx))) - { - goto Return; - } - } - - idx = -1; // All characters are allowed. + idx = encoder == null ? + JavaScriptEncoder.Default.FindFirstCharacterToEncode(ptr, value.Length) : + encoder.FindFirstCharacterToEncode(ptr, value.Length); Return: return idx; From 966cf0bc344945874b46122d319d0f3cb92339bd Mon Sep 17 00:00:00 2001 From: Ahson Khan Date: Tue, 22 Oct 2019 20:31:57 -0700 Subject: [PATCH 2/4] Remove unnecessary unsafe keyword and add comment to using directive. --- .../src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs index 525d8ed08bbe..fb9156fa99ee 100644 --- a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs +++ b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs @@ -5,7 +5,7 @@ using System.Buffers; using System.Buffers.Text; using System.Diagnostics; -using System.Runtime.CompilerServices; +using System.Runtime.CompilerServices; // Do not remove. Needed for Int32LsbToHexDigit when !BUILDING_INBOX_LIBRARY using System.Text.Encodings.Web; namespace System.Text.Json @@ -51,7 +51,7 @@ internal static partial class JsonWriterHelper private static bool NeedsEscapingNoBoundsCheck(char value) => AllowList[value] == 0; - public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) + public static int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) { return encoder == null ? JavaScriptEncoder.Default.FindFirstCharacterToEncodeUtf8(value) : From 07295c925558e6256132e609dd5938d42412fb34 Mon Sep 17 00:00:00 2001 From: Ahson Khan Date: Tue, 22 Oct 2019 22:00:40 -0700 Subject: [PATCH 3/4] Address feedback. --- .../System/Text/Json/Writer/JsonWriterHelper.Escaping.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs index fb9156fa99ee..67690f7394b9 100644 --- a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs +++ b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs @@ -53,9 +53,7 @@ internal static partial class JsonWriterHelper public static int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) { - return encoder == null ? - JavaScriptEncoder.Default.FindFirstCharacterToEncodeUtf8(value) : - encoder.FindFirstCharacterToEncodeUtf8(value); + return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncodeUtf8(value); } public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) @@ -72,9 +70,7 @@ public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncod goto Return; } - idx = encoder == null ? - JavaScriptEncoder.Default.FindFirstCharacterToEncode(ptr, value.Length) : - encoder.FindFirstCharacterToEncode(ptr, value.Length); + idx = (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length); Return: return idx; From e74a9a42d8ed17bb7ad67aca972036e5c3273382 Mon Sep 17 00:00:00 2001 From: Ahson Khan Date: Tue, 22 Oct 2019 23:25:48 -0700 Subject: [PATCH 4/4] Remove gotos and move the IsEmpty check outside the fixed block. --- .../Json/Writer/JsonWriterHelper.Escaping.cs | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs index 67690f7394b9..2a5101310d65 100644 --- a/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs +++ b/src/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs @@ -58,22 +58,16 @@ public static int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder enco public static unsafe int NeedsEscaping(ReadOnlySpan value, JavaScriptEncoder encoder) { - fixed (char* ptr = value) + // Some implementations of JavaScriptEncoder.FindFirstCharacterToEncode may not accept + // null pointers and gaurd against that. Hence, check up-front to return -1. + if (value.IsEmpty) { - int idx = 0; - - // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept - // null pointers and gaurd against that. Hence, check up-front and fall down to return -1. - if (value.IsEmpty) - { - idx = -1; // All characters are allowed. - goto Return; - } - - idx = (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length); + return -1; + } - Return: - return idx; + fixed (char* ptr = value) + { + return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length); } }