Skip to content

Commit ceef11f

Browse files
[release/7.0-rc1] Optimized string.Replace(char, char) (#74047)
* Optimized string.Replace(char, char) vector code path * Optimized code pathes even further * Do vectorized operation at the end of the string only once When the remaining length is a multiple of the vector size, then the remainder is processed twice. This is redundant, and not needed. This commit changes that, so that the remainder is processed only once when the remaining elements match. * Don't use trick for collapsed epilogs Cf. #67049 (comment) * Handle remainder vectorized even if remainingLength <= Vector<ushort>.Count and added tests for this * Introduce (internal) Vector.LoadUnsafe and Vector.StoreUnsafe and use it in string.Replace(char, char) * Avoid Unsafe.As<char, ushort> reinterpret casts by introducing string.GetRawStringDataAsUshort() internal method * Fixed copy/paste error (from local dev to repo) * PR Feedback * Fixed bug and added tests for this * Make condition about lengthToExamine clearer as suggested Co-authored-by: Günther Foidl <[email protected]>
1 parent 8baff7d commit ceef11f

File tree

4 files changed

+71
-25
lines changed

4 files changed

+71
-25
lines changed

src/libraries/Common/tests/Tests/System/StringTests.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,14 +4697,22 @@ public static void Remove_Invalid()
46974697
[InlineData("Aaaaaaaa", 'A', 'a', "aaaaaaaa")] // Single iteration of vectorised path; no remainders through non-vectorised path
46984698
// Three leading 'a's before a match (copyLength > 0), Single iteration of vectorised path; no remainders through non-vectorised path
46994699
[InlineData("aaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaa")]
4700-
// Single iteration of vectorised path; 3 remainders through non-vectorised path
4700+
// Single iteration of vectorised path; 3 remainders handled by vectorized path
47014701
[InlineData("AaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaa")]
4702+
// Single iteration of vectorized path; 0 remainders handled by vectorized path
4703+
[InlineData("aaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaa")]
4704+
// Eight chars before a match (copyLength > 0), single iteration of vectorized path for the remainder
4705+
[InlineData("12345678AAAAAAA", 'A', 'a', "12345678aaaaaaa")]
47024706
// ------------------------- For Vector<ushort>.Count == 16 (AVX2) -------------------------
47034707
[InlineData("AaaaaaaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaaaaaaa")] // Single iteration of vectorised path; no remainders through non-vectorised path
47044708
// Three leading 'a's before a match (copyLength > 0), Single iteration of vectorised path; no remainders through non-vectorised path
47054709
[InlineData("aaaAaaaaaaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")]
4706-
// Single iteration of vectorised path; 3 remainders through non-vectorised path
4710+
// Single iteration of vectorised path; 3 remainders handled by vectorized path
47074711
[InlineData("AaaaaaaaAaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")]
4712+
// Single iteration of vectorized path; 0 remainders handled by vectorized path
4713+
[InlineData("aaaaaaaaaaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")]
4714+
// Sixteen chars before a match (copyLength > 0), single iteration of vectorized path for the remainder
4715+
[InlineData("1234567890123456AAAAAAAAAAAAAAA", 'A', 'a', "1234567890123456aaaaaaaaaaaaaaa")]
47084716
// ----------------------------------- General test data -----------------------------------
47094717
[InlineData("Hello", 'l', '!', "He!!o")] // 2 match, non-vectorised path
47104718
[InlineData("Hello", 'e', 'e', "Hello")] // oldChar and newChar are same; nothing to replace

src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,14 @@ public static bool LessThanOrEqualAll<T>(Vector<T> left, Vector<T> right)
895895
public static bool LessThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
896896
where T : struct => LessThanOrEqual(left, right).As<T, nuint>() != Vector<nuint>.Zero;
897897

898+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
899+
internal static Vector<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
900+
where T : struct
901+
{
902+
source = ref Unsafe.Add(ref source, elementOffset);
903+
return Unsafe.ReadUnaligned<Vector<T>>(ref Unsafe.As<T, byte>(ref source));
904+
}
905+
898906
/// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
899907
/// <param name="left">The vector to compare with <paramref name="right" />.</param>
900908
/// <param name="right">The vector to compare with <paramref name="left" />.</param>
@@ -1658,6 +1666,14 @@ public static Vector<T> SquareRoot<T>(Vector<T> value)
16581666
return result;
16591667
}
16601668

1669+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1670+
internal static void StoreUnsafe<T>(this Vector<T> source, ref T destination, nuint elementOffset)
1671+
where T : struct
1672+
{
1673+
destination = ref Unsafe.Add(ref destination, elementOffset);
1674+
Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination), source);
1675+
}
1676+
16611677
/// <summary>Subtracts two vectors to compute their difference.</summary>
16621678
/// <param name="left">The vector from which <paramref name="right" /> will be subtracted.</param>
16631679
/// <param name="right">The vector to subtract from <paramref name="left" />.</param>

src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ public string Replace(char oldChar, char newChar)
994994
if (firstIndex < 0)
995995
return this;
996996

997-
int remainingLength = Length - firstIndex;
997+
nuint remainingLength = (uint)(Length - firstIndex);
998998
string result = FastAllocateString(Length);
999999

10001000
int copyLength = firstIndex;
@@ -1006,35 +1006,56 @@ public string Replace(char oldChar, char newChar)
10061006
}
10071007

10081008
// Copy the remaining characters, doing the replacement as we go.
1009-
ref ushort pSrc = ref Unsafe.Add(ref Unsafe.As<char, ushort>(ref _firstChar), copyLength);
1010-
ref ushort pDst = ref Unsafe.Add(ref Unsafe.As<char, ushort>(ref result._firstChar), copyLength);
1009+
ref ushort pSrc = ref Unsafe.Add(ref GetRawStringDataAsUInt16(), (uint)copyLength);
1010+
ref ushort pDst = ref Unsafe.Add(ref result.GetRawStringDataAsUInt16(), (uint)copyLength);
1011+
nuint i = 0;
10111012

1012-
if (Vector.IsHardwareAccelerated && remainingLength >= Vector<ushort>.Count)
1013+
if (Vector.IsHardwareAccelerated && Length >= Vector<ushort>.Count)
10131014
{
1014-
Vector<ushort> oldChars = new Vector<ushort>(oldChar);
1015-
Vector<ushort> newChars = new Vector<ushort>(newChar);
1015+
Vector<ushort> oldChars = new(oldChar);
1016+
Vector<ushort> newChars = new(newChar);
10161017

1017-
do
1018+
Vector<ushort> original;
1019+
Vector<ushort> equals;
1020+
Vector<ushort> results;
1021+
1022+
if (remainingLength > (nuint)Vector<ushort>.Count)
10181023
{
1019-
Vector<ushort> original = Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<ushort, byte>(ref pSrc));
1020-
Vector<ushort> equals = Vector.Equals(original, oldChars);
1021-
Vector<ushort> results = Vector.ConditionalSelect(equals, newChars, original);
1022-
Unsafe.WriteUnaligned(ref Unsafe.As<ushort, byte>(ref pDst), results);
1023-
1024-
pSrc = ref Unsafe.Add(ref pSrc, Vector<ushort>.Count);
1025-
pDst = ref Unsafe.Add(ref pDst, Vector<ushort>.Count);
1026-
remainingLength -= Vector<ushort>.Count;
1024+
nuint lengthToExamine = remainingLength - (nuint)Vector<ushort>.Count;
1025+
1026+
do
1027+
{
1028+
original = Vector.LoadUnsafe(ref pSrc, i);
1029+
equals = Vector.Equals(original, oldChars);
1030+
results = Vector.ConditionalSelect(equals, newChars, original);
1031+
results.StoreUnsafe(ref pDst, i);
1032+
1033+
i += (nuint)Vector<ushort>.Count;
1034+
}
1035+
while (i < lengthToExamine);
10271036
}
1028-
while (remainingLength >= Vector<ushort>.Count);
1029-
}
10301037

1031-
for (; remainingLength > 0; remainingLength--)
1032-
{
1033-
ushort currentChar = pSrc;
1034-
pDst = currentChar == oldChar ? newChar : currentChar;
1038+
// There are [0, Vector<ushort>.Count) elements remaining now.
1039+
// As the operation is idempotent, and we know that in total there are at least Vector<ushort>.Count
1040+
// elements available, we read a vector from the very end of the string, perform the replace
1041+
// and write to the destination at the very end.
1042+
// Thus we can eliminate the scalar processing of the remaining elements.
1043+
// We perform this operation even if there are 0 elements remaining, as it is cheaper than the
1044+
// additional check which would introduce a branch here.
10351045

1036-
pSrc = ref Unsafe.Add(ref pSrc, 1);
1037-
pDst = ref Unsafe.Add(ref pDst, 1);
1046+
i = (uint)(Length - Vector<ushort>.Count);
1047+
original = Vector.LoadUnsafe(ref GetRawStringDataAsUInt16(), i);
1048+
equals = Vector.Equals(original, oldChars);
1049+
results = Vector.ConditionalSelect(equals, newChars, original);
1050+
results.StoreUnsafe(ref result.GetRawStringDataAsUInt16(), i);
1051+
}
1052+
else
1053+
{
1054+
for (; i < remainingLength; ++i)
1055+
{
1056+
ushort currentChar = Unsafe.Add(ref pSrc, i);
1057+
Unsafe.Add(ref pDst, i) = currentChar == oldChar ? newChar : currentChar;
1058+
}
10381059
}
10391060

10401061
return result;

src/libraries/System.Private.CoreLib/src/System/String.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ public static bool IsNullOrWhiteSpace([NotNullWhen(false)] string? value)
508508
public ref readonly char GetPinnableReference() => ref _firstChar;
509509

510510
internal ref char GetRawStringData() => ref _firstChar;
511+
internal ref ushort GetRawStringDataAsUInt16() => ref Unsafe.As<char, ushort>(ref _firstChar);
511512

512513
// Helper for encodings so they can talk to our buffer directly
513514
// stringLength must be the exact size we'll expect

0 commit comments

Comments
 (0)