diff --git a/src/System.Text.Encodings.Web/src/Configurations.props b/src/System.Text.Encodings.Web/src/Configurations.props
index 6360871dacff..36cef612f8ef 100644
--- a/src/System.Text.Encodings.Web/src/Configurations.props
+++ b/src/System.Text.Encodings.Web/src/Configurations.props
@@ -1,6 +1,7 @@
+ netcoreapp3.0;
netstandard2.1;
netstandard;
uap-Windows_NT;
diff --git a/src/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj b/src/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
index 98050657653d..b4796cc3ab9d 100644
--- a/src/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
+++ b/src/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj
@@ -3,12 +3,16 @@
{B7EDBF00-765A-48E8-B593-CD668288E274}
System.Text.Encodings.Web
true
- netstandard-Debug;netstandard-Release;netstandard2.1-Debug;netstandard2.1-Release;uap-Windows_NT-Debug;uap-Windows_NT-Release
+ netcoreapp3.0-Debug;netcoreapp3.0-Release;netstandard-Debug;netstandard-Release;netstandard2.1-Debug;netstandard2.1-Release;uap-Windows_NT-Debug;uap-Windows_NT-Release
+ $(DefineConstants);BUILDING_INBOX_LIBRARY
+
+
+
@@ -20,6 +24,9 @@
+
+
+
System\Text\UnicodeDebug.cs
@@ -37,4 +44,7 @@
+
+
+
\ No newline at end of file
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoder.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoder.cs
new file mode 100644
index 000000000000..e3adc3cc6144
--- /dev/null
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoder.cs
@@ -0,0 +1,271 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Text.Internal;
+using System.Text.Unicode;
+
+#if BUILDING_INBOX_LIBRARY
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace System.Text.Encodings.Web
+{
+ internal sealed class DefaultJavaScriptEncoder : JavaScriptEncoder
+ {
+ private readonly AllowedCharactersBitmap _allowedCharacters;
+
+ private readonly int[] _asciiNeedsEscaping = new int[0x80];
+
+ public DefaultJavaScriptEncoder(TextEncoderSettings filter)
+ {
+ if (filter == null)
+ {
+ throw new ArgumentNullException(nameof(filter));
+ }
+
+ _allowedCharacters = filter.GetAllowedCharacters();
+
+ // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
+ // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
+ _allowedCharacters.ForbidUndefinedCharacters();
+
+ // Forbid characters that are special in HTML.
+ // Even though this is a not HTML encoder,
+ // it's unfortunately common for developers to
+ // forget to HTML-encode a string once it has been JS-encoded,
+ // so this offers extra protection.
+ DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
+
+ // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+ // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
+ _allowedCharacters.ForbidCharacter('\\');
+
+ // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
+ _allowedCharacters.ForbidCharacter('`');
+
+ for (int i = 0; i < _asciiNeedsEscaping.Length; i++)
+ {
+ _asciiNeedsEscaping[i] = WillEncode(i) ? 1 : -1;
+ }
+ }
+
+ public DefaultJavaScriptEncoder(params UnicodeRange[] allowedRanges) : this(new TextEncoderSettings(allowedRanges))
+ { }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override bool WillEncode(int unicodeScalar)
+ {
+ if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+ {
+ return true;
+ }
+
+ Debug.Assert(unicodeScalar >= char.MinValue && unicodeScalar <= char.MaxValue);
+
+ return !_allowedCharacters.IsUnicodeScalarAllowed(unicodeScalar);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
+ {
+ if (text == null)
+ {
+ throw new ArgumentNullException(nameof(text));
+ }
+
+ return _allowedCharacters.FindFirstCharacterToEncode(text, textLength);
+ }
+
+ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan utf8Text)
+ {
+ fixed (byte* ptr = utf8Text)
+ {
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ sbyte* startingAddress = (sbyte*)ptr;
+ while (utf8Text.Length - 16 >= idx)
+ {
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));
+
+ // Load the next 16 bytes.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ Vector128 mask = Sse2Helper.CreateAsciiMask(sourceValue);
+ int index = Sse2.MoveMask(mask);
+
+ if (index != 0)
+ {
+ // At least one of the following 16 bytes is non-ASCII.
+
+ int processNextSixteen = idx + 16;
+ Debug.Assert(processNextSixteen <= utf8Text.Length);
+
+ while (idx < processNextSixteen)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
+ {
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
+
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
+ }
+ }
+ else
+ {
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ startingAddress = (sbyte*)ptr + idx;
+ }
+
+ // Process the remaining bytes.
+ Debug.Assert(utf8Text.Length - idx < 16);
+ }
+#endif
+
+ while (idx < utf8Text.Length)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
+ {
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
+
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
+ }
+
+ idx = -1; // All bytes are allowed.
+
+ Return:
+ return idx;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int DoesAsciiNeedEncoding(byte value)
+ {
+ Debug.Assert(value <= 0x7F);
+
+ int needsEscaping = _asciiNeedsEscaping[value];
+
+ Debug.Assert(needsEscaping == 1 || needsEscaping == -1);
+
+ return needsEscaping;
+ }
+
+ // The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
+ // We don't need to worry about astral code points since they're represented as encoded
+ // surrogate pairs in the output.
+ public override int MaxOutputCharactersPerInputCharacter => 12; // "\uFFFF\uFFFF" is the longest encoded form
+
+ private static readonly char[] s_b = new char[] { '\\', 'b' };
+ private static readonly char[] s_t = new char[] { '\\', 't' };
+ private static readonly char[] s_n = new char[] { '\\', 'n' };
+ private static readonly char[] s_f = new char[] { '\\', 'f' };
+ private static readonly char[] s_r = new char[] { '\\', 'r' };
+ private static readonly char[] s_back = new char[] { '\\', '\\' };
+
+ // Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
+ // See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
+ // http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
+ // http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+ public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+ {
+ if (buffer == null)
+ {
+ throw new ArgumentNullException(nameof(buffer));
+ }
+ // ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
+ // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
+ // (in ECMA-262 this character is a NonEscape character); however, we
+ // don't encode SOLIDUS by default unless the caller has provided an
+ // explicit bitmap which does not contain it. In this case we'll assume
+ // that the caller didn't want a SOLIDUS written to the output at all,
+ // so it should be written using "\u002F" encoding.
+ // HTML-specific characters (including apostrophe and quotes) will
+ // be written out as numeric entities for defense-in-depth.
+ // See UnicodeEncoderBase ctor comments for more info.
+
+ if (!WillEncode(unicodeScalar))
+ {
+ return TryWriteScalarAsChar(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+
+ char[] toCopy;
+ switch (unicodeScalar)
+ {
+ case '\b': toCopy = s_b; break;
+ case '\t': toCopy = s_t; break;
+ case '\n': toCopy = s_n; break;
+ case '\f': toCopy = s_f; break;
+ case '\r': toCopy = s_r; break;
+ case '\\': toCopy = s_back; break;
+ default: return JavaScriptEncoderHelper.TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+ return TryCopyCharacters(toCopy, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+ }
+}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoderBasicLatin.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoderBasicLatin.cs
new file mode 100644
index 000000000000..de1c466e308b
--- /dev/null
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoderBasicLatin.cs
@@ -0,0 +1,289 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Text.Internal;
+using System.Text.Unicode;
+
+#if BUILDING_INBOX_LIBRARY
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
+namespace System.Text.Encodings.Web
+{
+ internal sealed class DefaultJavaScriptEncoderBasicLatin : JavaScriptEncoder
+ {
+ internal static readonly DefaultJavaScriptEncoderBasicLatin s_singleton = new DefaultJavaScriptEncoderBasicLatin();
+
+ private DefaultJavaScriptEncoderBasicLatin()
+ {
+ var filter = new TextEncoderSettings(UnicodeRanges.BasicLatin);
+
+ AllowedCharactersBitmap allowedCharacters = filter.GetAllowedCharacters();
+
+ // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
+ // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
+ allowedCharacters.ForbidUndefinedCharacters();
+
+ // Forbid characters that are special in HTML.
+ // Even though this is a not HTML encoder,
+ // it's unfortunately common for developers to
+ // forget to HTML-encode a string once it has been JS-encoded,
+ // so this offers extra protection.
+ DefaultHtmlEncoder.ForbidHtmlCharacters(allowedCharacters);
+
+ // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+ // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
+ allowedCharacters.ForbidCharacter('\\');
+
+ // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
+ allowedCharacters.ForbidCharacter('`');
+
+#if DEBUG
+ // Verify and ensure that the AllowList bit map matches the set of allowed characters using AllowedCharactersBitmap
+ for (int i = 0; i < AllowList.Length; i++)
+ {
+ char ch = (char)i;
+ Debug.Assert((allowedCharacters.IsCharacterAllowed(ch) ? 1 : 0) == AllowList[ch]);
+ Debug.Assert(allowedCharacters.IsCharacterAllowed(ch) == !NeedsEscaping(ch));
+ }
+ for (int i = AllowList.Length; i <= char.MaxValue; i++)
+ {
+ char ch = (char)i;
+ Debug.Assert(!allowedCharacters.IsCharacterAllowed(ch));
+ Debug.Assert(NeedsEscaping(ch));
+ }
+#endif
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override bool WillEncode(int unicodeScalar)
+ {
+ if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+ {
+ return true;
+ }
+
+ Debug.Assert(unicodeScalar >= char.MinValue && unicodeScalar <= char.MaxValue);
+
+ return NeedsEscaping((char)unicodeScalar);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
+ {
+ if (text == null)
+ {
+ throw new ArgumentNullException(nameof(text));
+ }
+
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ short* startingAddress = (short*)text;
+ while (textLength - 8 >= idx)
+ {
+ Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8));
+
+ // Load the next 8 characters.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ // Check if any of the 8 characters need to be escaped.
+ Vector128 mask = Sse2Helper.CreateEscapingMask_DefaultJavaScriptEncoderBasicLatin(sourceValue);
+
+ int index = Sse2.MoveMask(mask.AsByte());
+ // If index == 0, that means none of the 8 characters needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one character that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 8 characters.
+ Debug.Assert(index > 0 && index <= 65_535);
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
+ idx += tzc >> 1;
+ goto Return;
+ }
+ idx += 8;
+ startingAddress += 8;
+ }
+
+ // Process the remaining characters.
+ Debug.Assert(textLength - idx < 8);
+ }
+#endif
+
+ for (; idx < textLength; idx++)
+ {
+ Debug.Assert((text + idx) <= (text + textLength));
+ if (NeedsEscaping(*(text + idx)))
+ {
+ goto Return;
+ }
+ }
+
+ idx = -1; // All characters are allowed.
+
+ Return:
+ return idx;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan utf8Text)
+ {
+ fixed (byte* ptr = utf8Text)
+ {
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ sbyte* startingAddress = (sbyte*)ptr;
+ while (utf8Text.Length - 16 >= idx)
+ {
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));
+
+ // Load the next 16 bytes.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ // Check if any of the 16 bytes need to be escaped.
+ Vector128 mask = Sse2Helper.CreateEscapingMask_DefaultJavaScriptEncoderBasicLatin(sourceValue);
+
+ int index = Sse2.MoveMask(mask);
+ // If index == 0, that means none of the 16 bytes needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one byte that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 16 bytes.
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc >= 0 && tzc <= 16);
+ idx += tzc;
+ goto Return;
+ }
+ idx += 16;
+ startingAddress += 16;
+ }
+
+ // Process the remaining bytes.
+ Debug.Assert(utf8Text.Length - idx < 16);
+ }
+#endif
+
+ for (; idx < utf8Text.Length; idx++)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+ if (NeedsEscaping(*(ptr + idx)))
+ {
+ goto Return;
+ }
+ }
+
+ idx = -1; // All bytes are allowed.
+
+ Return:
+ return idx;
+ }
+ }
+
+ // The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
+ // We don't need to worry about astral code points since they're represented as encoded
+ // surrogate pairs in the output.
+ public override int MaxOutputCharactersPerInputCharacter => 12; // "\uFFFF\uFFFF" is the longest encoded form
+
+ private static readonly char[] s_b = new char[] { '\\', 'b' };
+ private static readonly char[] s_t = new char[] { '\\', 't' };
+ private static readonly char[] s_n = new char[] { '\\', 'n' };
+ private static readonly char[] s_f = new char[] { '\\', 'f' };
+ private static readonly char[] s_r = new char[] { '\\', 'r' };
+ private static readonly char[] s_back = new char[] { '\\', '\\' };
+
+ // Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
+ // See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
+ // http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
+ // http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+ public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+ {
+ if (buffer == null)
+ {
+ throw new ArgumentNullException(nameof(buffer));
+ }
+ // ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
+ // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
+ // (in ECMA-262 this character is a NonEscape character); however, we
+ // don't encode SOLIDUS by default unless the caller has provided an
+ // explicit bitmap which does not contain it. In this case we'll assume
+ // that the caller didn't want a SOLIDUS written to the output at all,
+ // so it should be written using "\u002F" encoding.
+ // HTML-specific characters (including apostrophe and quotes) will
+ // be written out as numeric entities for defense-in-depth.
+ // See UnicodeEncoderBase ctor comments for more info.
+
+ if (!WillEncode(unicodeScalar))
+ {
+ return TryWriteScalarAsChar(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+
+ char[] toCopy;
+ switch (unicodeScalar)
+ {
+ case '\b':
+ toCopy = s_b;
+ break;
+ case '\t':
+ toCopy = s_t;
+ break;
+ case '\n':
+ toCopy = s_n;
+ break;
+ case '\f':
+ toCopy = s_f;
+ break;
+ case '\r':
+ toCopy = s_r;
+ break;
+ case '\\':
+ toCopy = s_back;
+ break;
+ default:
+ return JavaScriptEncoderHelper.TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+ return TryCopyCharacters(toCopy, buffer, bufferLength, out numberOfCharactersWritten);
+ }
+
+ private static ReadOnlySpan AllowList => new byte[byte.MaxValue + 1]
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0000..U+000F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0010..U+001F
+ 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, // U+0020..U+002F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // U+0030..U+003F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0040..U+004F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // U+0050..U+005F
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0060..U+006F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // U+0070..U+007F
+
+ // Also include the ranges from U+0080 to U+00FF for performance to avoid UTF8 code from checking boundary.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+00F0..U+00FF
+ };
+
+ public const int LastAsciiCharacter = 0x7F;
+
+ private static bool NeedsEscaping(byte value) => AllowList[value] == 0;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0;
+ }
+}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs
index 4c1ecc1cad21..8f20dc04e9db 100644
--- a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoder.cs
@@ -2,10 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System.ComponentModel;
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-using System.Text.Internal;
using System.Text.Unicode;
namespace System.Text.Encodings.Web
@@ -18,10 +14,7 @@ public abstract class JavaScriptEncoder : TextEncoder
///
/// Returns a default built-in instance of .
///
- public static JavaScriptEncoder Default
- {
- get { return DefaultJavaScriptEncoder.Singleton; }
- }
+ public static JavaScriptEncoder Default => DefaultJavaScriptEncoderBasicLatin.s_singleton;
///
/// Returns a built-in instance of that is less strict about what gets encoded.
@@ -40,11 +33,8 @@ public static JavaScriptEncoder Default
///
/// Unlike the , this encoder instance allows some other characters to go through unescaped (for example, '+'), and hence must be used cautiously.
///
- ///
- public static JavaScriptEncoder UnsafeRelaxedJsonEscaping
- {
- get { return UnsafeRelaxedJavaScriptEncoder.s_singleton; }
- }
+ ///
+ public static JavaScriptEncoder UnsafeRelaxedJsonEscaping => UnsafeRelaxedJavaScriptEncoder.s_singleton;
///
/// Creates a new instance of JavaScriptEncoder with provided settings.
@@ -67,166 +57,4 @@ public static JavaScriptEncoder Create(params UnicodeRange[] allowedRanges)
return new DefaultJavaScriptEncoder(allowedRanges);
}
}
-
- internal sealed class DefaultJavaScriptEncoder : JavaScriptEncoder
- {
- private AllowedCharactersBitmap _allowedCharacters;
-
- internal static readonly DefaultJavaScriptEncoder Singleton = new DefaultJavaScriptEncoder(new TextEncoderSettings(UnicodeRanges.BasicLatin));
-
- public DefaultJavaScriptEncoder(TextEncoderSettings filter)
- {
- if (filter == null)
- {
- throw new ArgumentNullException(nameof(filter));
- }
-
- _allowedCharacters = filter.GetAllowedCharacters();
-
- // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
- // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
- _allowedCharacters.ForbidUndefinedCharacters();
-
- // Forbid characters that are special in HTML.
- // Even though this is a not HTML encoder,
- // it's unfortunately common for developers to
- // forget to HTML-encode a string once it has been JS-encoded,
- // so this offers extra protection.
- DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
-
- // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
- // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
- _allowedCharacters.ForbidCharacter('\\');
-
- // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
- _allowedCharacters.ForbidCharacter('`');
- }
-
- public DefaultJavaScriptEncoder(params UnicodeRange[] allowedRanges) : this(new TextEncoderSettings(allowedRanges))
- { }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public override bool WillEncode(int unicodeScalar)
- {
- if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar)) return true;
- return !_allowedCharacters.IsUnicodeScalarAllowed(unicodeScalar);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public unsafe override int FindFirstCharacterToEncode(char* text, int textLength)
- {
- if (text == null)
- {
- throw new ArgumentNullException(nameof(text));
- }
-
- return _allowedCharacters.FindFirstCharacterToEncode(text, textLength);
- }
-
- // The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
- // We don't need to worry about astral code points since they're represented as encoded
- // surrogate pairs in the output.
- public override int MaxOutputCharactersPerInputCharacter
- {
- get { return 12; } // "\uFFFF\uFFFF" is the longest encoded form
- }
-
- static readonly char[] s_b = new char[] { '\\', 'b' };
- static readonly char[] s_t = new char[] { '\\', 't' };
- static readonly char[] s_n = new char[] { '\\', 'n' };
- static readonly char[] s_f = new char[] { '\\', 'f' };
- static readonly char[] s_r = new char[] { '\\', 'r' };
- static readonly char[] s_back = new char[] { '\\', '\\' };
-
- // Writes a scalar value as a JavaScript-escaped character (or sequence of characters).
- // See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
- // http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
- // http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
- public unsafe override bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
- {
- if (buffer == null)
- {
- throw new ArgumentNullException(nameof(buffer));
- }
- // ECMA-262 allows encoding U+000B as "\v", but ECMA-404 does not.
- // Both ECMA-262 and ECMA-404 allow encoding U+002F SOLIDUS as "\/"
- // (in ECMA-262 this character is a NonEscape character); however, we
- // don't encode SOLIDUS by default unless the caller has provided an
- // explicit bitmap which does not contain it. In this case we'll assume
- // that the caller didn't want a SOLIDUS written to the output at all,
- // so it should be written using "\u002F" encoding.
- // HTML-specific characters (including apostrophe and quotes) will
- // be written out as numeric entities for defense-in-depth.
- // See UnicodeEncoderBase ctor comments for more info.
-
- if (!WillEncode(unicodeScalar)) { return TryWriteScalarAsChar(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten); }
-
- char[] toCopy;
- switch (unicodeScalar)
- {
- case '\b': toCopy = s_b; break;
- case '\t': toCopy = s_t; break;
- case '\n': toCopy = s_n; break;
- case '\f': toCopy = s_f; break;
- case '\r': toCopy = s_r; break;
- case '\\': toCopy = s_back; break;
- default: return TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
- }
- return TryCopyCharacters(toCopy, buffer, bufferLength, out numberOfCharactersWritten);
- }
-
- private static unsafe bool TryWriteEncodedScalarAsNumericEntity(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
- {
- Debug.Assert(buffer != null && length >= 0);
-
- if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
- {
- // Convert this back to UTF-16 and write out both characters.
- char leadingSurrogate, trailingSurrogate;
- UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue(unicodeScalar, out leadingSurrogate, out trailingSurrogate);
- int leadingSurrogateCharactersWritten;
- if (TryWriteEncodedSingleCharacter(leadingSurrogate, buffer, length, out leadingSurrogateCharactersWritten) &&
- TryWriteEncodedSingleCharacter(trailingSurrogate, buffer + leadingSurrogateCharactersWritten, length - leadingSurrogateCharactersWritten, out numberOfCharactersWritten)
- )
- {
- numberOfCharactersWritten += leadingSurrogateCharactersWritten;
- return true;
- }
- else
- {
- numberOfCharactersWritten = 0;
- return false;
- }
- }
- else
- {
- // This is only a single character.
- return TryWriteEncodedSingleCharacter(unicodeScalar, buffer, length, out numberOfCharactersWritten);
- }
- }
-
- // Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
- private static unsafe bool TryWriteEncodedSingleCharacter(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
- {
- Debug.Assert(buffer != null && length >= 0);
- Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar), "The incoming value should've been in the BMP.");
-
- if (length < 6)
- {
- numberOfCharactersWritten = 0;
- return false;
- }
-
- // Encode this as 6 chars "\uFFFF".
- *buffer = '\\'; buffer++;
- *buffer = 'u'; buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit(unicodeScalar >> 12); buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 8) & 0xFU)); buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 4) & 0xFU)); buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)(unicodeScalar & 0xFU)); buffer++;
-
- numberOfCharactersWritten = 6;
- return true;
- }
- }
}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoderHelper.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoderHelper.cs
new file mode 100644
index 000000000000..7c8bffa4f54a
--- /dev/null
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/JavaScriptEncoderHelper.cs
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Text.Unicode;
+
+namespace System.Text.Encodings.Web
+{
+ internal static class JavaScriptEncoderHelper
+ {
+ public static unsafe bool TryWriteEncodedScalarAsNumericEntity(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
+ {
+ Debug.Assert(buffer != null && length >= 0);
+
+ if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+ {
+ // Convert this back to UTF-16 and write out both characters.
+ UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue(unicodeScalar, out char leadingSurrogate, out char trailingSurrogate);
+ if (TryWriteEncodedSingleCharacter(leadingSurrogate, buffer, length, out int leadingSurrogateCharactersWritten) &&
+ TryWriteEncodedSingleCharacter(trailingSurrogate, buffer + leadingSurrogateCharactersWritten, length - leadingSurrogateCharactersWritten, out numberOfCharactersWritten)
+ )
+ {
+ numberOfCharactersWritten += leadingSurrogateCharactersWritten;
+ return true;
+ }
+ else
+ {
+ numberOfCharactersWritten = 0;
+ return false;
+ }
+ }
+ else
+ {
+ // This is only a single character.
+ return TryWriteEncodedSingleCharacter(unicodeScalar, buffer, length, out numberOfCharactersWritten);
+ }
+ }
+
+ // Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
+ private static unsafe bool TryWriteEncodedSingleCharacter(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
+ {
+ Debug.Assert(buffer != null && length >= 0);
+ Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar), "The incoming value should've been in the BMP.");
+
+ if (length < 6)
+ {
+ numberOfCharactersWritten = 0;
+ return false;
+ }
+
+ // Encode this as 6 chars "\uFFFF".
+ *buffer = '\\';
+ buffer++;
+ *buffer = 'u';
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit(unicodeScalar >> 12);
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 8) & 0xFU));
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 4) & 0xFU));
+ buffer++;
+ *buffer = HexUtil.Int32LsbToHexDigit((int)(unicodeScalar & 0xFU));
+
+ numberOfCharactersWritten = 6;
+ return true;
+ }
+ }
+}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/Sse2Helper.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/Sse2Helper.cs
new file mode 100644
index 000000000000..6caebd3e10a6
--- /dev/null
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/Sse2Helper.cs
@@ -0,0 +1,127 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Text.Encodings.Web
+{
+ internal static class Sse2Helper
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ // Space ' ', anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
+ Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20);
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateEscapingMask_DefaultJavaScriptEncoderBasicLatin(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ Vector128 mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'
+
+ mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateEscapingMask_DefaultJavaScriptEncoderBasicLatin(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ Vector128 mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateAsciiMask(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x00); // Null, anything above short.MaxValue but less than or equal char.MaxValue
+ mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128 CreateAsciiMask(Vector128 sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
+
+ // Null, anything above sbyte.MaxValue but less than or equal byte.MaxValue (i.e. anything above the ASCII range)
+ Vector128 mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x00);
+ return mask;
+ }
+
+ private static readonly Vector128 s_mask_UInt16_0x00 = Vector128.Zero; // Null
+
+ private static readonly Vector128 s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
+
+ private static readonly Vector128 s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
+ private static readonly Vector128 s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
+ private static readonly Vector128 s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
+ private static readonly Vector128 s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
+ private static readonly Vector128 s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
+ private static readonly Vector128 s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
+ private static readonly Vector128 s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
+ private static readonly Vector128 s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'
+
+ private static readonly Vector128 s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'
+
+ private static readonly Vector128 s_mask_SByte_0x00 = Vector128.Zero; // Null
+
+ private static readonly Vector128 s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '
+
+ private static readonly Vector128 s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
+ private static readonly Vector128 s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
+ private static readonly Vector128 s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
+ private static readonly Vector128 s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
+ private static readonly Vector128 s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
+ private static readonly Vector128 s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
+ private static readonly Vector128 s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
+ private static readonly Vector128 s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
+ }
+}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
index bdd09f24593b..eb2b50129ad4 100644
--- a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
@@ -10,6 +10,11 @@
using System.Runtime.InteropServices;
using System.Text.Unicode;
+#if BUILDING_INBOX_LIBRARY
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace System.Text.Encodings.Web
{
///
@@ -24,6 +29,8 @@ public abstract class TextEncoder
// Fast cache for Ascii
private byte[][] _asciiEscape = new byte[0x80][];
+ private readonly int[] _asciiNeedsEscaping = new int[0x80];
+
// Keep a reference to Array.Empty as this is used as a singleton for comparisons
// and there is no guarantee that Array.Empty() will always be the same instance.
private static readonly byte[] s_noEscape = Array.Empty();
@@ -693,46 +700,132 @@ private unsafe int FindFirstCharacterToEncode(ReadOnlySpan text)
/// current encoder instance, or -1 if no data in requires escaping.
///
[EditorBrowsable(EditorBrowsableState.Never)]
- public virtual int FindFirstCharacterToEncodeUtf8(ReadOnlySpan utf8Text)
+ public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan utf8Text)
{
- int originalUtf8TextLength = utf8Text.Length;
-
// Loop through the input text, terminating when we see ill-formed UTF-8 or when we decode a scalar value
// that must be encoded. If we see either of these things then we'll return its index in the original
// input sequence. If we consume the entire text without seeing either of these, return -1 to indicate
// that the text can be copied as-is without escaping.
- int i = 0;
- while (i < utf8Text.Length)
+ fixed (byte* ptr = utf8Text)
{
- byte value = utf8Text[i];
- if (UnicodeUtility.IsAsciiCodePoint(value))
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
{
- if (!ReferenceEquals(GetAsciiEncoding(value), s_noEscape))
+ sbyte* startingAddress = (sbyte*)ptr;
+ while (utf8Text.Length - 16 >= idx)
{
- return originalUtf8TextLength - utf8Text.Length + i;
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));
+
+ // Load the next 16 bytes.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ Vector128 mask = Sse2Helper.CreateAsciiMask(sourceValue);
+ int index = Sse2.MoveMask(mask);
+
+ if (index != 0)
+ {
+ // At least one of the following 16 bytes is non-ASCII.
+
+ int processNextSixteen = idx + 16;
+ Debug.Assert(processNextSixteen <= utf8Text.Length);
+
+ while (idx < processNextSixteen)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
+ {
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
+
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
+ }
+ }
+ else
+ {
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1
+ || DoesAsciiNeedEncoding(ptr[++idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ startingAddress = (sbyte*)ptr + idx;
}
- i++;
+ // Process the remaining bytes.
+ Debug.Assert(utf8Text.Length - idx < 16);
}
- else
+#endif
+
+ while (idx < utf8Text.Length)
{
- if (i > 0)
- {
- utf8Text = utf8Text.Slice(i);
- }
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
- if (UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text, out uint nextScalarValue, out int bytesConsumedThisIteration) != OperationStatus.Done
- || WillEncode((int)nextScalarValue))
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
{
- return originalUtf8TextLength - utf8Text.Length;
+ if (DoesAsciiNeedEncoding(ptr[idx]) == 1)
+ {
+ goto Return;
+ }
+ idx++;
}
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
- i = bytesConsumedThisIteration;
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
}
- }
- return -1; // no input data needs to be escaped
+ idx = -1; // All bytes are allowed.
+
+ Return:
+ return idx;
+ }
}
///
@@ -813,10 +906,26 @@ private byte[] GetAsciiEncoding(byte value)
_asciiEscape[value] = encoding;
}
}
-
return encoding;
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int DoesAsciiNeedEncoding(byte value)
+ {
+ Debug.Assert(value <= 0x7F);
+
+ int needsEscaping = _asciiNeedsEscaping[value];
+
+ Debug.Assert(needsEscaping == 0 || needsEscaping == 1 || needsEscaping == -1);
+
+ if (needsEscaping == 0)
+ {
+ needsEscaping = WillEncode(value) ? 1 : -1;
+ _asciiNeedsEscaping[value] = needsEscaping;
+ }
+ return needsEscaping;
+ }
+
private static void ThrowArgumentException_MaxOutputCharsPerInputChar()
{
throw new ArgumentException("Argument encoder does not implement MaxOutputCharsPerInputChar correctly.");
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UnsafeRelaxedJavaScriptEncoder.cs b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UnsafeRelaxedJavaScriptEncoder.cs
index 7226d1760d07..a47afe1bf021 100644
--- a/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UnsafeRelaxedJavaScriptEncoder.cs
+++ b/src/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UnsafeRelaxedJavaScriptEncoder.cs
@@ -2,25 +2,29 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text.Internal;
using System.Text.Unicode;
+#if BUILDING_INBOX_LIBRARY
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace System.Text.Encodings.Web
{
internal sealed class UnsafeRelaxedJavaScriptEncoder : JavaScriptEncoder
{
private readonly AllowedCharactersBitmap _allowedCharacters;
- internal static readonly UnsafeRelaxedJavaScriptEncoder s_singleton = new UnsafeRelaxedJavaScriptEncoder(new TextEncoderSettings(UnicodeRanges.All));
+ internal static readonly UnsafeRelaxedJavaScriptEncoder s_singleton = new UnsafeRelaxedJavaScriptEncoder();
- private UnsafeRelaxedJavaScriptEncoder(TextEncoderSettings filter)
+ private UnsafeRelaxedJavaScriptEncoder()
{
- if (filter == null)
- {
- throw new ArgumentNullException(nameof(filter));
- }
+ var filter = new TextEncoderSettings(UnicodeRanges.All);
_allowedCharacters = filter.GetAllowedCharacters();
@@ -44,18 +48,209 @@ public override bool WillEncode(int unicodeScalar)
return true;
}
+ Debug.Assert(unicodeScalar >= char.MinValue && unicodeScalar <= char.MaxValue);
+
return !_allowedCharacters.IsUnicodeScalarAllowed(unicodeScalar);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public unsafe override int FindFirstCharacterToEncode(char* text, int textLength)
+ public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
- return _allowedCharacters.FindFirstCharacterToEncode(text, textLength);
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ short* startingAddress = (short*)text;
+ while (textLength - 8 >= idx)
+ {
+ Debug.Assert(startingAddress >= text && startingAddress <= (text + textLength - 8));
+
+ // Load the next 8 characters.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ Vector128 mask = Sse2Helper.CreateAsciiMask(sourceValue);
+ int index = Sse2.MoveMask(mask.AsByte());
+
+ if (index != 0)
+ {
+ // At least one of the following 8 characters is non-ASCII.
+ int processNextEight = idx + 8;
+ Debug.Assert(processNextEight <= textLength);
+ for (; idx < processNextEight; idx++)
+ {
+ Debug.Assert((text + idx) <= (text + textLength));
+ if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
+ {
+ goto Return;
+ }
+ }
+ startingAddress += 8;
+ }
+ else
+ {
+ // Check if any of the 8 characters need to be escaped.
+ mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
+
+ index = Sse2.MoveMask(mask.AsByte());
+ // If index == 0, that means none of the 8 characters needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one character that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 8 characters.
+ Debug.Assert(index > 0 && index <= 65_535);
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
+ idx += tzc >> 1;
+ goto Return;
+ }
+ idx += 8;
+ startingAddress += 8;
+ }
+ }
+
+ // Process the remaining characters.
+ Debug.Assert(textLength - idx < 8);
+ }
+#endif
+
+ for (; idx < textLength; idx++)
+ {
+ Debug.Assert((text + idx) <= (text + textLength));
+ if (!_allowedCharacters.IsCharacterAllowed(*(text + idx)))
+ {
+ goto Return;
+ }
+ }
+
+ idx = -1; // All characters are allowed.
+
+ Return:
+ return idx;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan utf8Text)
+ {
+ fixed (byte* ptr = utf8Text)
+ {
+ int idx = 0;
+
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ sbyte* startingAddress = (sbyte*)ptr;
+ while (utf8Text.Length - 16 >= idx)
+ {
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));
+
+ // Load the next 16 bytes.
+ Vector128 sourceValue = Sse2.LoadVector128(startingAddress);
+
+ Vector128 mask = Sse2Helper.CreateAsciiMask(sourceValue);
+ int index = Sse2.MoveMask(mask);
+
+ if (index != 0)
+ {
+ // At least one of the following 16 bytes is non-ASCII.
+
+ int processNextSixteen = idx + 16;
+ Debug.Assert(processNextSixteen <= utf8Text.Length);
+
+ while (idx < processNextSixteen)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
+ {
+ if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
+
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
+ }
+ startingAddress = (sbyte*)ptr + idx;
+ }
+ else
+ {
+ // Check if any of the 16 bytes need to be escaped.
+ mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
+
+ index = Sse2.MoveMask(mask);
+ // If index == 0, that means none of the 16 bytes needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one byte that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 16 bytes.
+ Debug.Assert(index > 0 && index <= 65_535);
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc >= 0 && tzc <= 16);
+ idx += tzc;
+ goto Return;
+ }
+ idx += 16;
+ startingAddress += 16;
+ }
+ }
+
+ // Process the remaining bytes.
+ Debug.Assert(utf8Text.Length - idx < 16);
+ }
+#endif
+
+ while (idx < utf8Text.Length)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + utf8Text.Length));
+
+ if (UnicodeUtility.IsAsciiCodePoint(ptr[idx]))
+ {
+ if (!_allowedCharacters.IsUnicodeScalarAllowed(ptr[idx]))
+ {
+ goto Return;
+ }
+ idx++;
+ }
+ else
+ {
+ OperationStatus opStatus = UnicodeHelpers.DecodeScalarValueFromUtf8(utf8Text.Slice(idx), out uint nextScalarValue, out int utf8BytesConsumedForScalar);
+
+ Debug.Assert(nextScalarValue <= int.MaxValue);
+ if (opStatus != OperationStatus.Done || WillEncode((int)nextScalarValue))
+ {
+ goto Return;
+ }
+
+ Debug.Assert(opStatus == OperationStatus.Done);
+ idx += utf8BytesConsumedForScalar;
+ }
+ }
+
+ idx = -1; // All bytes are allowed.
+
+ Return:
+ return idx;
+ }
}
// The worst case encoding is 6 output chars per input char: [input] U+FFFF -> [output] "\uFFFF"
@@ -75,7 +270,7 @@ public unsafe override int FindFirstCharacterToEncode(char* text, int textLength
// See ECMA-262, Sec. 7.8.4, and ECMA-404, Sec. 9
// http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.4
// http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
- public unsafe override bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+ public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
{
if (buffer == null)
{
@@ -122,66 +317,9 @@ public unsafe override bool TryEncodeUnicodeScalar(int unicodeScalar, char* buff
toCopy = s_back;
break;
default:
- return TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
+ return JavaScriptEncoderHelper.TryWriteEncodedScalarAsNumericEntity(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
}
return TryCopyCharacters(toCopy, buffer, bufferLength, out numberOfCharactersWritten);
}
-
- private static unsafe bool TryWriteEncodedScalarAsNumericEntity(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
- {
- Debug.Assert(buffer != null && length >= 0);
-
- if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
- {
- // Convert this back to UTF-16 and write out both characters.
- UnicodeHelpers.GetUtf16SurrogatePairFromAstralScalarValue(unicodeScalar, out char leadingSurrogate, out char trailingSurrogate);
- if (TryWriteEncodedSingleCharacter(leadingSurrogate, buffer, length, out int leadingSurrogateCharactersWritten) &&
- TryWriteEncodedSingleCharacter(trailingSurrogate, buffer + leadingSurrogateCharactersWritten, length - leadingSurrogateCharactersWritten, out numberOfCharactersWritten)
- )
- {
- numberOfCharactersWritten += leadingSurrogateCharactersWritten;
- return true;
- }
- else
- {
- numberOfCharactersWritten = 0;
- return false;
- }
- }
- else
- {
- // This is only a single character.
- return TryWriteEncodedSingleCharacter(unicodeScalar, buffer, length, out numberOfCharactersWritten);
- }
- }
-
- // Writes an encoded scalar value (in the BMP) as a JavaScript-escaped character.
- private static unsafe bool TryWriteEncodedSingleCharacter(int unicodeScalar, char* buffer, int length, out int numberOfCharactersWritten)
- {
- Debug.Assert(buffer != null && length >= 0);
- Debug.Assert(!UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar), "The incoming value should've been in the BMP.");
-
- if (length < 6)
- {
- numberOfCharactersWritten = 0;
- return false;
- }
-
- // Encode this as 6 chars "\uFFFF".
- *buffer = '\\';
- buffer++;
- *buffer = 'u';
- buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit(unicodeScalar >> 12);
- buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 8) & 0xFU));
- buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)((unicodeScalar >> 4) & 0xFU));
- buffer++;
- *buffer = HexUtil.Int32LsbToHexDigit((int)(unicodeScalar & 0xFU));
-
- numberOfCharactersWritten = 6;
- return true;
- }
}
}
diff --git a/src/System.Text.Encodings.Web/src/System/Text/Internal/AllowedCharactersBitmap.cs b/src/System.Text.Encodings.Web/src/System/Text/Internal/AllowedCharactersBitmap.cs
index cb752838a190..75c6d8ffc6f1 100644
--- a/src/System.Text.Encodings.Web/src/System/Text/Internal/AllowedCharactersBitmap.cs
+++ b/src/System.Text.Encodings.Web/src/System/Text/Internal/AllowedCharactersBitmap.cs
@@ -73,30 +73,64 @@ public AllowedCharactersBitmap Clone()
// Determines whether the given character can be returned unencoded.
public bool IsCharacterAllowed(char character)
{
- int codePoint = character;
- int index = codePoint >> 5;
- int offset = codePoint & 0x1F;
- return ((_allowedCharacters[index] >> offset) & 0x1U) != 0;
+ return IsUnicodeScalarAllowed(character);
}
// Determines whether the given character can be returned unencoded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool IsUnicodeScalarAllowed(int unicodeScalar)
{
+ Debug.Assert(unicodeScalar < 0x10000);
int index = unicodeScalar >> 5;
int offset = unicodeScalar & 0x1F;
- return ((_allowedCharacters[index] >> offset) & 0x1U) != 0;
+ return (_allowedCharacters[index] & (0x1U << offset)) != 0;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe int FindFirstCharacterToEncode(char* text, int textLength)
{
- for (int i = 0; i < textLength; i++)
+ int i = 0;
+
+ while (i <= textLength - 8)
+ {
+ if (!IsCharacterAllowed(text[i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i]))
+ {
+ goto Return;
+ }
+ i++;
+ }
+
+ while (i <= textLength - 4)
+ {
+ if (!IsCharacterAllowed(text[i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i])
+ || !IsCharacterAllowed(text[++i]))
+ {
+ goto Return;
+ }
+ i++;
+ }
+
+ while (i < textLength)
{
if (!IsCharacterAllowed(text[i]))
- { return i; }
+ {
+ goto Return;
+ }
+ i++;
}
- return -1;
+
+ i = -1;
+
+ Return:
+ return i;
}
}
}
diff --git a/src/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs b/src/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
index c2b00605608b..c66044f04717 100644
--- a/src/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
+++ b/src/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
@@ -4,10 +4,15 @@
using System;
using System.Buffers;
+using System.Collections.Generic;
+using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
using System.Text.Encodings.Web;
+using System.Text.Internal;
using System.Text.Unicode;
using Xunit;
@@ -15,6 +20,466 @@ namespace Microsoft.Framework.WebEncoders
{
public partial class JavaScriptStringEncoderTests
{
+ [Fact]
+ public unsafe void NullPtrThrows()
+ {
+ Assert.Throws(() => JavaScriptEncoder.Default.FindFirstCharacterToEncode(null, 0));
+ Assert.Throws(() => JavaScriptEncoder.UnsafeRelaxedJsonEscaping.FindFirstCharacterToEncode(null, 0));
+ Assert.Throws(() => JavaScriptEncoder.Create(UnicodeRanges.All).FindFirstCharacterToEncode(null, 0));
+
+ Assert.Throws(() => JavaScriptEncoder.Default.TryEncodeUnicodeScalar('a', null, 0, out _));
+ Assert.Throws(() => JavaScriptEncoder.UnsafeRelaxedJsonEscaping.TryEncodeUnicodeScalar('a', null, 0, out _));
+ Assert.Throws(() => JavaScriptEncoder.Create(UnicodeRanges.All).TryEncodeUnicodeScalar('a', null, 0, out _));
+
+ Assert.Throws(() => JavaScriptEncoder.Create((TextEncoderSettings)null));
+ Assert.Throws(() => JavaScriptEncoder.Create((UnicodeRange)null));
+ }
+
+ [Theory]
+ [MemberData(nameof(EscapingTestData))]
+ public unsafe void FindFirstCharacterToEncode(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+ {
+ Assert.Equal(-1, encoder.FindFirstCharacterToEncodeUtf8(default));
+ fixed (char* ptr = string.Empty)
+ {
+ Assert.Equal(-1, encoder.FindFirstCharacterToEncode(ptr, 0));
+ }
+
+ var random = new Random(42);
+ for (int dataLength = 0; dataLength < 50; dataLength++)
+ {
+ char[] str = new char[dataLength];
+ for (int i = 0; i < dataLength; i++)
+ {
+ str[i] = (char)random.Next(97, 123);
+ }
+ string baseStr = new string(str);
+ byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+ Assert.Equal(-1, encoder.FindFirstCharacterToEncodeUtf8(sourceUtf8));
+ fixed (char* ptr = baseStr)
+ {
+ Assert.Equal(-1, encoder.FindFirstCharacterToEncode(ptr, baseStr.Length));
+ }
+
+ for (int i = 0; i < dataLength; i++)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed[i] = replacementChar;
+ string source = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(source);
+
+ Assert.Equal(requiresEscaping ? i : -1, encoder.FindFirstCharacterToEncodeUtf8(sourceUtf8));
+ fixed (char* ptr = source)
+ {
+ Assert.Equal(requiresEscaping ? i : -1, encoder.FindFirstCharacterToEncode(ptr, source.Length));
+ }
+ }
+
+ if (dataLength != 0)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed.AsSpan().Fill(replacementChar);
+ string source = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(source);
+
+ Assert.Equal(requiresEscaping ? 0 : -1, encoder.FindFirstCharacterToEncodeUtf8(sourceUtf8));
+ fixed (char* ptr = source)
+ {
+ Assert.Equal(requiresEscaping ? 0 : -1, encoder.FindFirstCharacterToEncode(ptr, source.Length));
+ }
+ }
+ }
+ }
+
+ public static IEnumerable