Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/System.Text.Json/src/System.Text.Json.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@
<Reference Include="System.Resources.ResourceManager" />
<Reference Include="System.Runtime" />
<Reference Include="System.Runtime.Extensions" />
<Reference Include="System.Runtime.Intrinsics" />
<Reference Include="System.Text.Encoding.Extensions" />
</ItemGroup>
<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,9 @@
using System.Buffers;
using System.Buffers.Text;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.CompilerServices; // Do not remove. Needed for Int32LsbToHexDigit when !BUILDING_INBOX_LIBRARY
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when !BUILDING_INBOX_LIBRARY

Put into #if !BUILDING_INBOX_LIBRARY to make this (code-) explicit?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. I will do this separately (for master) since it is primarily a cosmetic change and I want to cherry-pick this change for 3.1 (was waiting on CI to be green for a while :p).

using System.Text.Encodings.Web;

#if BUILDING_INBOX_LIBRARY
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace System.Text.Json
{
// TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
Expand Down Expand Up @@ -58,204 +51,23 @@ internal static partial class JsonWriterHelper

private static bool NeedsEscapingNoBoundsCheck(char value) => AllowList[value] == 0;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0;

#if BUILDING_INBOX_LIBRARY
private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '

private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'

private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'

private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '

private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue)
{
Debug.Assert(Sse2.IsSupported);

Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'

mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range

return mask;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue)
public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
{
Debug.Assert(Sse2.IsSupported);

Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `

return mask;
return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncodeUtf8(value);
}
#endif

public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
{
fixed (byte* ptr = value)
// Some implementations of JavaScriptEncoder.FindFirstCharacterToEncode may not accept
// null pointers and gaurd against that. Hence, check up-front to return -1.
if (value.IsEmpty)
{
int idx = 0;

if (encoder != null)
{
idx = encoder.FindFirstCharacterToEncodeUtf8(value);
goto Return;
}

#if BUILDING_INBOX_LIBRARY
if (Sse2.IsSupported)
{
sbyte* startingAddress = (sbyte*)ptr;
while (value.Length - 16 >= idx)
{
Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16));

// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

// Check if any of the 16 bytes need to be escaped.
Vector128<sbyte> mask = CreateEscapingMask(sourceValue);

int index = Sse2.MoveMask(mask.AsByte());
// If index == 0, that means none of the 16 bytes needed to be escaped.
// TrailingZeroCount is relatively expensive, avoid it if possible.
if (index != 0)
{
// Found at least one byte that needs to be escaped, figure out the index of
// the first one found that needed to be escaped within the 16 bytes.
Debug.Assert(index > 0 && index <= 65_535);
int tzc = BitOperations.TrailingZeroCount(index);
Debug.Assert(tzc >= 0 && tzc <= 16);
idx += tzc;
goto Return;
}
idx += 16;
startingAddress += 16;
}

// Process the remaining characters.
Debug.Assert(value.Length - idx < 16);
}
#endif

for (; idx < value.Length; idx++)
{
Debug.Assert((ptr + idx) <= (ptr + value.Length));
if (NeedsEscaping(*(ptr + idx)))
{
goto Return;
}
}

idx = -1; // all characters allowed

Return:
return idx;
return -1;
}
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the goto getting us here now?

Copy link
Author

@ahsonkhan ahsonkhan Oct 23, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not much perf benefit on its own, but we do produce smaller disassembly without having multiple return points in a fixed block, which could help with inlining.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

really? it seems like the difference would be a jmp vs a ret; is there some stack space cleanup?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Never mind, re-checked the assembly diff, and removing the goto is better in terms of asm size.

// Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
if (value.IsEmpty)
{
    return -1; // All characters are allowed.
}

fixed (char* ptr = value)
{
    return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length);
}

versus

public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
{
    fixed (char* ptr = value)
    {
        int idx = 0;

        // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
        // null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
        if (value.IsEmpty)
        {
            idx = -1; // All characters are allowed.
            goto Return;
        }

        idx = (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length);

    Return:
        return idx;
    }
}

See diff (3 fewer instructions total):
https://www.diffchecker.com/uqTkCcBl

public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
{
fixed (char* ptr = value)
{
int idx = 0;

// Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
// null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
if (encoder != null && !value.IsEmpty)
{
idx = encoder.FindFirstCharacterToEncode(ptr, value.Length);
goto Return;
}

#if BUILDING_INBOX_LIBRARY
if (Sse2.IsSupported)
{
short* startingAddress = (short*)ptr;
while (value.Length - 8 >= idx)
{
Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8));

// Load the next 8 characters.
Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress);

// Check if any of the 8 characters need to be escaped.
Vector128<short> mask = CreateEscapingMask(sourceValue);

int index = Sse2.MoveMask(mask.AsByte());
// If index == 0, that means none of the 8 characters needed to be escaped.
// TrailingZeroCount is relatively expensive, avoid it if possible.
if (index != 0)
{
// Found at least one character that needs to be escaped, figure out the index of
// the first one found that needed to be escaped within the 8 characters.
Debug.Assert(index > 0 && index <= 65_535);
int tzc = BitOperations.TrailingZeroCount(index);
Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
idx += tzc >> 1;
goto Return;
}
idx += 8;
startingAddress += 8;
}

// Process the remaining characters.
Debug.Assert(value.Length - idx < 8);
}
#endif

for (; idx < value.Length; idx++)
{
Debug.Assert((ptr + idx) <= (ptr + value.Length));
if (NeedsEscaping(*(ptr + idx)))
{
goto Return;
}
}

idx = -1; // All characters are allowed.

Return:
return idx;
return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length);
}
}

Expand Down