diff --git a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs index 2bb171a9826dbc..6e9c7a3451c25a 100644 --- a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs @@ -232,7 +232,11 @@ void TestWithPoisonPages(PoisonPagePlacement poisonPlacement, int haystackLength .First(c => !values.AsSpan().ContainsAny(c, char.ToLowerInvariant(c))); } - TestWithDifferentMarkerChars(haystack, '\0'); + if (!values.Contains('\0')) + { + TestWithDifferentMarkerChars(haystack, '\0'); + } + TestWithDifferentMarkerChars(haystack, '\u00FC'); TestWithDifferentMarkerChars(haystack, asciiNumberNotInSet); TestWithDifferentMarkerChars(haystack, asciiLetterLowerNotInSet); @@ -407,10 +411,26 @@ public static void SimpleIndexOfAnyValues(params string[] valuesArray) valuesArray[offset] = $"{original[0]}\u00F6{original.AsSpan(1)}"; TestCore(valuesArray); + // Test non-ASCII values over 0xFF + valuesArray[offset] = $"{original}\u2049"; + TestCore(valuesArray); + + valuesArray[offset] = $"\u2049{original}"; + TestCore(valuesArray); + + valuesArray[offset] = $"{original[0]}\u2049{original.AsSpan(1)}"; + TestCore(valuesArray); + // Test null chars in values valuesArray[offset] = $"{original[0]}\0{original.AsSpan(1)}"; TestCore(valuesArray); + valuesArray[offset] = $"\0{original}"; + TestCore(valuesArray); + + valuesArray[offset] = $"{original}\0"; + TestCore(valuesArray); + static void TestCore(string[] valuesArray) { Values_ImplementsSearchValuesBase(StringComparison.Ordinal, valuesArray); @@ -529,7 +549,7 @@ public static void TestIndexOfAny_RandomInputs_Stress() if (RemoteExecutor.IsSupported && Avx512F.IsSupported) { var psi = new ProcessStartInfo(); - psi.Environment.Add("DOTNET_EnableAVX512F", "0"); + psi.Environment.Add("DOTNET_EnableAVX512", "0"); RemoteExecutor.Invoke(RunStress, new RemoteInvokeOptions { StartInfo = psi, TimeOut = 10 * 60 * 1000 }).Dispose(); } diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 1c2303c097a6aa..478b3fe245aa89 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -471,6 +471,7 @@ + @@ -2462,10 +2463,8 @@ Common\Interop\Unix\System.Native\Interop.MountPoints.cs - - + + Common\Interop\Unix\System.Native\Interop.Open.cs @@ -2879,4 +2878,4 @@ - + \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs index 1a0c3d80863aec..b764e3a22d81ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs @@ -91,7 +91,7 @@ namespace System.Buffers // // For an alternative description of the algorithm, see // https://github.com/BurntSushi/aho-corasick/blob/8d735471fc12f0ca570cead8e17342274fae6331/src/packed/teddy/README.md - // Has an O(i * m) worst-case, with the expected time closer to O(n) for good bucket distributions. + // Has an O(i * m) worst-case, with the expected time closer to O(i) for good bucket distributions. internal abstract class AsciiStringSearchValuesTeddyBase : StringSearchValuesRabinKarp where TBucketized : struct, SearchValues.IRuntimeConst where TStartCaseSensitivity : struct, ICaseSensitivity // Refers to the characters being matched by Teddy diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/StringSearchValuesHelper.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/StringSearchValuesHelper.cs index 15a608cce70db6..ff420480438c5e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/StringSearchValuesHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/StringSearchValuesHelper.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Text; namespace System.Buffers @@ -270,12 +271,31 @@ public static bool Equals(ref char matchStart, ref readonly Single else { Debug.Assert(state.Value.Length is 2 or 3); - Debug.Assert(matchStart == state.Value[0], "This should only be called after the first character has been checked"); - // We know that the candidate is 2 or 3 characters long, and that the first character has already been checked. - // We only have to to check whether the last 2 characters also match. ref byte matchByteStart = ref Unsafe.As(ref matchStart); - return Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) == state.Value32_1; + + if (AdvSimd.IsSupported) + { + // See comments on SingleStringSearchValuesPackedThreeChars.CanSkipAnchorMatchVerification. + // When running on Arm64, this helper is also used to confirm vectorized anchor matches. + // We do so because we're using UnzipEven when packing inputs, which may produce false positive anchor matches. + // When called from SingleStringSearchValuesThreeChars (non-packed), we could skip to the else branch instead. + Debug.Assert(matchStart == state.Value[0] || (matchStart & 0xFF) == state.Value[0]); + + uint differentBits = Unsafe.ReadUnaligned(ref matchByteStart) - state.Value32_0; + differentBits |= Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) - state.Value32_1; + return differentBits == 0; + } + else + { + // Otherwise, this path is not used when confirming vectorized anchor matches. + // It's only used as part of the scalar search loop, which always checks that the first character matches before calling this helper. + // We know that the candidate is 2 or 3 characters long, and that the first character has already been checked. + // We only have to to check whether the last 2 characters also match. + Debug.Assert(matchStart == state.Value[0], "This should only be called after the first character has been checked"); + + return Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) == state.Value32_1; + } } } } @@ -319,13 +339,32 @@ public static bool Equals(ref char matchStart, ref readonly Single else { Debug.Assert(state.Value.Length is 2 or 3); - Debug.Assert(TransformInput(matchStart) == state.Value[0], "This should only be called after the first character has been checked"); - // We know that the candidate is 2 or 3 characters long, and that the first character has already been checked. - // We only have to to check whether the last 2 characters also match. const uint CaseMask = ~0x200020u; ref byte matchByteStart = ref Unsafe.As(ref matchStart); - return (Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) == state.Value32_1; + + if (AdvSimd.IsSupported) + { + // See comments on SingleStringSearchValuesPackedThreeChars.CanSkipAnchorMatchVerification. + // When running on Arm64, this helper is also used to confirm vectorized anchor matches. + // We do so because we're using UnzipEven when packing inputs, which may produce false positive anchor matches. + // When called from SingleStringSearchValuesThreeChars (non-packed), we could skip to the else branch instead. + Debug.Assert(TransformInput((char)(matchStart & 0xFF)) == state.Value[0]); + + uint differentBits = (Unsafe.ReadUnaligned(ref matchByteStart) & CaseMask) - state.Value32_0; + differentBits |= (Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) - state.Value32_1; + return differentBits == 0; + } + else + { + // Otherwise, this path is not used when confirming vectorized anchor matches. + // It's only used as part of the scalar search loop, which always checks that the first character matches before calling this helper. + // We know that the candidate is 2 or 3 characters long, and that the first character has already been checked. + // We only have to to check whether the last 2 characters also match. + Debug.Assert(TransformInput(matchStart) == state.Value[0], "This should only be called after the first character has been checked"); + + return (Unsafe.ReadUnaligned(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) == state.Value32_1; + } } } } @@ -392,7 +431,6 @@ public static bool Equals(ref char matchStart, ref readonly Single else { Debug.Assert(state.Value.Length is 2 or 3); - Debug.Assert((matchStart & ~0x20) == (state.Value[0] & ~0x20)); ref byte matchByteStart = ref Unsafe.As(ref matchStart); uint differentBits = (Unsafe.ReadUnaligned(ref matchByteStart) & state.ToUpperMask32_0) - state.Value32_0; diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesPackedThreeChars.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesPackedThreeChars.cs new file mode 100644 index 00000000000000..5c1d64bf9b0dfb --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesPackedThreeChars.cs @@ -0,0 +1,417 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +using static System.Buffers.StringSearchValuesHelper; + +namespace System.Buffers +{ + /// + /// Same as , but using packed comparisons similar to . + /// + internal sealed class SingleStringSearchValuesPackedThreeChars : StringSearchValuesBase + where TValueLength : struct, IValueLength + where TCaseSensitivity : struct, ICaseSensitivity + { + private const byte CaseConversionMask = unchecked((byte)~0x20); + + private readonly SingleValueState _valueState; + private readonly nint _minusValueTailLength; + private readonly nuint _ch2ByteOffset; + private readonly nuint _ch3ByteOffset; + private readonly byte _ch1; + private readonly byte _ch2; + private readonly byte _ch3; + + private static bool IgnoreCase => typeof(TCaseSensitivity) != typeof(CaseSensitive); + + // If the value is short (ValueLengthLessThan4 => 2 or 3 characters), the anchors already represent the whole value. + // With case-sensitive comparisons, we've therefore already confirmed the match, so we can skip doing so here. + // With case-insensitive comparisons, we applied a mask to the input, so while the anchors likely matched, we can't be sure. + // If the value is composed of only ASCII letters, masking the input can't produce false positives, so we can also skip the verification step. + // We only do this when running on X86 and not ARM64, as the latter uses UnzipEven when packing inputs, which may produce false positive anchor matches. + // We use that instead of ExtractNarrowingSaturate because it allows for higher searching throughput. + private static bool CanSkipAnchorMatchVerification + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => + Sse2.IsSupported && + typeof(TValueLength) == typeof(ValueLengthLessThan4) && + (typeof(TCaseSensitivity) == typeof(CaseSensitive) || typeof(TCaseSensitivity) == typeof(CaseInsensitiveAsciiLetters)); + } + + public SingleStringSearchValuesPackedThreeChars(HashSet? uniqueValues, string value, int ch2Offset, int ch3Offset) : base(uniqueValues) + { + Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported); + + // We could have more than one entry in 'uniqueValues' if this value is an exact prefix of all the others. + Debug.Assert(value.Length > 1); + Debug.Assert(ch3Offset == 0 || ch3Offset > ch2Offset); + Debug.Assert(value[0] <= byte.MaxValue && value[ch2Offset] <= byte.MaxValue && value[ch3Offset] <= byte.MaxValue); + + _valueState = new SingleValueState(value, IgnoreCase); + _minusValueTailLength = -(value.Length - 1); + + _ch1 = (byte)value[0]; + _ch2 = (byte)value[ch2Offset]; + _ch3 = (byte)value[ch3Offset]; + + if (IgnoreCase) + { + _ch1 &= CaseConversionMask; + _ch2 &= CaseConversionMask; + _ch3 &= CaseConversionMask; + } + + _ch2ByteOffset = (nuint)ch2Offset * 2; + _ch3ByteOffset = (nuint)ch3Offset * 2; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAnyMultiString(ReadOnlySpan span) => + IndexOf(ref MemoryMarshal.GetReference(span), span.Length); + + private int IndexOf(ref char searchSpace, int searchSpaceLength) + { + ref char searchSpaceStart = ref searchSpace; + + nint searchSpaceMinusValueTailLength = searchSpaceLength + _minusValueTailLength; + + nuint ch2ByteOffset = _ch2ByteOffset; + nuint ch3ByteOffset = _ch3ByteOffset; + + if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceMinusValueTailLength - Vector512.Count >= 0) + { + Vector512 ch1 = Vector512.Create(_ch1); + Vector512 ch2 = Vector512.Create(_ch2); + Vector512 ch3 = Vector512.Create(_ch3); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector512.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch3ByteOffset / sizeof(char))); + + // Find which starting positions likely contain a match (likely match all 3 anchor characters). + Vector512 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); + + if (result != Vector512.Zero) + { + goto CandidateFound; + } + + LoopFooter: + // We haven't found a match. Update the input position and check if we've reached the end. + searchSpace = ref Unsafe.Add(ref searchSpace, Vector512.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector512.Count))) + { + return -1; + } + + // We have fewer than 64 characters remaining. Adjust the input position such that we will do one last loop iteration. + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound: + // We found potential matches, but they may be false-positives, so we must verify each one. + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter; + } + } + else if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) + { + Vector256 ch1 = Vector256.Create(_ch1); + Vector256 ch2 = Vector256.Create(_ch2); + Vector256 ch3 = Vector256.Create(_ch3); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector256.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch3ByteOffset / sizeof(char))); + + // Find which starting positions likely contain a match (likely match all 3 anchor characters). + Vector256 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); + + if (result != Vector256.Zero) + { + goto CandidateFound; + } + + LoopFooter: + searchSpace = ref Unsafe.Add(ref searchSpace, Vector256.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector256.Count))) + { + return -1; + } + + // We have fewer than 32 characters remaining. Adjust the input position such that we will do one last loop iteration. + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound: + // We found potential matches, but they may be false-positives, so we must verify each one. + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, PackedSpanHelpers.FixUpPackedVector256Result(result).ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter; + } + } + else if ((Sse2.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceMinusValueTailLength - Vector128.Count >= 0) + { + Vector128 ch1 = Vector128.Create(_ch1); + Vector128 ch2 = Vector128.Create(_ch2); + Vector128 ch3 = Vector128.Create(_ch3); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector128.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch3ByteOffset / sizeof(char))); + + // Find which starting positions likely contain a match (likely match all 3 anchor characters). + Vector128 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); + + if (result != Vector128.Zero) + { + goto CandidateFound; + } + + LoopFooter: + searchSpace = ref Unsafe.Add(ref searchSpace, Vector128.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector128.Count))) + { + return -1; + } + + // We have fewer than 16 characters remaining. Adjust the input position such that we will do one last loop iteration. + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound: + // We found potential matches, but they may be false-positives, so we must verify each one. + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, result.ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter; + } + } + + char valueHead = _valueState.Value.GetRawStringData(); + + for (nint i = 0; i < searchSpaceMinusValueTailLength; i++) + { + ref char cur = ref Unsafe.Add(ref searchSpace, i); + + // CaseInsensitiveUnicode doesn't support single-character transformations, so we skip checking the first character first. + if ((typeof(TCaseSensitivity) == typeof(CaseInsensitiveUnicode) || TCaseSensitivity.TransformInput(cur) == valueHead) && + TCaseSensitivity.Equals(ref cur, in _valueState)) + { + return (int)i; + } + } + + return -1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + private static Vector128 GetComparisonResult(ref char searchSpace, nuint ch2ByteOffset, nuint ch3ByteOffset, Vector128 ch1, Vector128 ch2, Vector128 ch3) + { + // Load 3 vectors from the input. + // One from the current search space, the other two at an offset based on the distance of those characters from the first one. + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadPacked128(ref searchSpace, 0)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadPacked128(ref searchSpace, ch2ByteOffset)); + Vector128 cmpCh3 = Vector128.Equals(ch3, LoadPacked128(ref searchSpace, ch3ByteOffset)); + // AND all 3 together to get a mask of possible match positions that match in at least 3 places. + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + else + { + // For each, AND the value with ~0x20 so that letters are uppercased. + // For characters that aren't ASCII letters, this may produce wrong results, but only false-positives. + // We will take care of those in the verification step if the other characters also indicate a possible match. + Vector128 caseConversion = Vector128.Create(CaseConversionMask); + + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadPacked128(ref searchSpace, 0) & caseConversion); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadPacked128(ref searchSpace, ch2ByteOffset) & caseConversion); + Vector128 cmpCh3 = Vector128.Equals(ch3, LoadPacked128(ref searchSpace, ch3ByteOffset) & caseConversion); + // AND all 3 together to get a mask of possible match positions that likely match in at least 3 places. + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 GetComparisonResult(ref char searchSpace, nuint ch2ByteOffset, nuint ch3ByteOffset, Vector256 ch1, Vector256 ch2, Vector256 ch3) + { + // See comments in 'GetComparisonResult' for Vector128 above. + // This method is the same, but operates on 32 input characters at a time. + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadPacked256(ref searchSpace, 0)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadPacked256(ref searchSpace, ch2ByteOffset)); + Vector256 cmpCh3 = Vector256.Equals(ch3, LoadPacked256(ref searchSpace, ch3ByteOffset)); + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + else + { + Vector256 caseConversion = Vector256.Create(CaseConversionMask); + + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadPacked256(ref searchSpace, 0) & caseConversion); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadPacked256(ref searchSpace, ch2ByteOffset) & caseConversion); + Vector256 cmpCh3 = Vector256.Equals(ch3, LoadPacked256(ref searchSpace, ch3ByteOffset) & caseConversion); + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx512BW))] + private static Vector512 GetComparisonResult(ref char searchSpace, nuint ch2ByteOffset, nuint ch3ByteOffset, Vector512 ch1, Vector512 ch2, Vector512 ch3) + { + // See comments in 'GetComparisonResult' for Vector128 above. + // This method is the same, but operates on 64 input characters at a time. + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + Vector512 cmpCh1 = Vector512.Equals(ch1, LoadPacked512(ref searchSpace, 0)); + Vector512 cmpCh2 = Vector512.Equals(ch2, LoadPacked512(ref searchSpace, ch2ByteOffset)); + Vector512 cmpCh3 = Vector512.Equals(ch3, LoadPacked512(ref searchSpace, ch3ByteOffset)); + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + else + { + Vector512 caseConversion = Vector512.Create(CaseConversionMask); + + Vector512 cmpCh1 = Vector512.Equals(ch1, LoadPacked512(ref searchSpace, 0) & caseConversion); + Vector512 cmpCh2 = Vector512.Equals(ch2, LoadPacked512(ref searchSpace, ch2ByteOffset) & caseConversion); + Vector512 cmpCh3 = Vector512.Equals(ch3, LoadPacked512(ref searchSpace, ch3ByteOffset) & caseConversion); + return (cmpCh1 & cmpCh2 & cmpCh3).AsByte(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char searchSpace, uint mask, out int offsetFromStart) + { + // 'mask' encodes the input positions where at least 3 characters likely matched. + // Verify each one to see if we've found a match, otherwise return back to the vectorized loop. + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + + ref char matchRef = ref Unsafe.Add(ref searchSpace, bitPos); + + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref matchRef, _valueState.Value.Length); + + if (CanSkipAnchorMatchVerification || TCaseSensitivity.Equals(ref matchRef, in _valueState)) + { + offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + return true; + } + + mask = BitOperations.ResetLowestSetBit(mask); + } + while (mask != 0); + + offsetFromStart = 0; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char searchSpace, ulong mask, out int offsetFromStart) + { + // 'mask' encodes the input positions where at least 3 characters likely matched. + // Verify each one to see if we've found a match, otherwise return back to the vectorized loop. + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + + ref char matchRef = ref Unsafe.Add(ref searchSpace, bitPos); + + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref matchRef, _valueState.Value.Length); + + if (CanSkipAnchorMatchVerification || TCaseSensitivity.Equals(ref matchRef, in _valueState)) + { + offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + return true; + } + + mask = BitOperations.ResetLowestSetBit(mask); + } + while (mask != 0); + + offsetFromStart = 0; + return false; + } + + internal override bool ContainsCore(string value) => HasUniqueValues + ? base.ContainsCore(value) + : _valueState.Value.Equals(value, IgnoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); + + internal override string[] GetValues() => HasUniqueValues + ? base.GetValues() + : [_valueState.Value]; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + private static Vector128 LoadPacked128(ref char searchSpace, nuint byteOffset) + { + Vector128 input0 = Vector128.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)); + Vector128 input1 = Vector128.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector128.Count)); + + return Sse2.IsSupported + ? Sse2.PackUnsignedSaturate(input0.AsInt16(), input1.AsInt16()) + : AdvSimd.Arm64.UnzipEven(input0.AsByte(), input1.AsByte()); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 LoadPacked256(ref char searchSpace, nuint byteOffset) => + Avx2.PackUnsignedSaturate( + Vector256.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)).AsInt16(), + Vector256.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector256.Count)).AsInt16()); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx512BW))] + private static Vector512 LoadPacked512(ref char searchSpace, nuint byteOffset) => + Avx512BW.PackUnsignedSaturate( + Vector512.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)).AsInt16(), + Vector512.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector512.Count)).AsInt16()); + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesThreeChars.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesThreeChars.cs index c005173b67e143..9640f9040b630e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesThreeChars.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/SingleStringSearchValuesThreeChars.cs @@ -43,13 +43,10 @@ private static bool CanSkipAnchorMatchVerification (typeof(TCaseSensitivity) == typeof(CaseSensitive) || typeof(TCaseSensitivity) == typeof(CaseInsensitiveAsciiLetters)); } - public SingleStringSearchValuesThreeChars(HashSet? uniqueValues, string value) : base(uniqueValues) + public SingleStringSearchValuesThreeChars(HashSet? uniqueValues, string value, int ch2Offset, int ch3Offset) : base(uniqueValues) { // We could have more than one entry in 'uniqueValues' if this value is an exact prefix of all the others. Debug.Assert(value.Length > 1); - - CharacterFrequencyHelper.GetSingleStringMultiCharacterOffsets(value, IgnoreCase, out int ch2Offset, out int ch3Offset); - Debug.Assert(ch3Offset == 0 || ch3Offset > ch2Offset); _valueState = new SingleValueState(value, IgnoreCase); @@ -61,6 +58,8 @@ public SingleStringSearchValuesThreeChars(HashSet? uniqueValues, string if (IgnoreCase) { + Debug.Assert(char.IsAscii((char)_ch1) && char.IsAscii((char)_ch2) && char.IsAscii((char)_ch3)); + _ch1 &= CaseConversionMask; _ch2 &= CaseConversionMask; _ch3 &= CaseConversionMask; @@ -99,8 +98,8 @@ private int IndexOf(ref char searchSpace, int searchSpaceLength) while (true) { ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch2ByteOffset / 2)); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch3ByteOffset / 2)); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(_ch3ByteOffset / sizeof(char))); // Find which starting positions likely contain a match (likely match all 3 anchor characters). Vector512 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); @@ -147,8 +146,8 @@ private int IndexOf(ref char searchSpace, int searchSpaceLength) while (true) { ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch2ByteOffset / 2)); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch3ByteOffset / 2)); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(_ch3ByteOffset / sizeof(char))); // Find which starting positions likely contain a match (likely match all 3 anchor characters). Vector256 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); @@ -194,8 +193,8 @@ private int IndexOf(ref char searchSpace, int searchSpaceLength) while (true) { ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch2ByteOffset / 2)); - ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch3ByteOffset / 2)); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch2ByteOffset / sizeof(char))); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(_ch3ByteOffset / sizeof(char))); // Find which starting positions likely contain a match (likely match all 3 anchor characters). Vector128 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, ch3ByteOffset, ch1, ch2, ch3); @@ -281,7 +280,7 @@ private static Vector128 GetComparisonResult(ref char searchSpace, nuint c private static Vector256 GetComparisonResult(ref char searchSpace, nuint ch2ByteOffset, nuint ch3ByteOffset, Vector256 ch1, Vector256 ch2, Vector256 ch3) { // See comments in 'GetComparisonResult' for Vector128 above. - // This method is the same, but operates on 32 input characters at a time. + // This method is the same, but operates on 16 input characters at a time. if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) { Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace)); @@ -304,7 +303,7 @@ private static Vector256 GetComparisonResult(ref char searchSpace, nuint c private static Vector512 GetComparisonResult(ref char searchSpace, nuint ch2ByteOffset, nuint ch3ByteOffset, Vector512 ch1, Vector512 ch2, Vector512 ch3) { // See comments in 'GetComparisonResult' for Vector128 above. - // This method is the same, but operates on 64 input characters at a time. + // This method is the same, but operates on 32 input characters at a time. if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) { Vector512 cmpCh1 = Vector512.Equals(ch1, Vector512.LoadUnsafe(ref searchSpace)); @@ -339,7 +338,7 @@ private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char if (CanSkipAnchorMatchVerification || TCaseSensitivity.Equals(ref matchRef, in _valueState)) { - offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / 2); + offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); return true; } @@ -379,7 +378,6 @@ private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char return false; } - internal override bool ContainsCore(string value) => HasUniqueValues ? base.ContainsCore(value) : _valueState.Value.Equals(value, IgnoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs index 42ae98a2b440ce..05db4d2f96bf73 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs @@ -417,29 +417,51 @@ private static SearchValues CreateForSingleValue( { if (!ignoreCase) { - return new SingleStringSearchValuesThreeChars(uniqueValues, value); + return CreateSingleValuesThreeChars(value, uniqueValues); } if (asciiLettersOnly) { - return new SingleStringSearchValuesThreeChars(uniqueValues, value); + return CreateSingleValuesThreeChars(value, uniqueValues); } if (allAscii) { - return new SingleStringSearchValuesThreeChars(uniqueValues, value); + return CreateSingleValuesThreeChars(value, uniqueValues); } // SingleStringSearchValuesThreeChars doesn't have logic to handle non-ASCII case conversion, so we require that anchor characters are ASCII. // Right now we're always selecting the first character as one of the anchors, and we need at least two. if (char.IsAscii(value[0]) && value.AsSpan(1).ContainsAnyInRange((char)0, (char)127)) { - return new SingleStringSearchValuesThreeChars(uniqueValues, value); + return CreateSingleValuesThreeChars(value, uniqueValues); } return null; } + private static SearchValues CreateSingleValuesThreeChars( + string value, + HashSet? uniqueValues) + where TValueLength : struct, IValueLength + where TCaseSensitivity : struct, ICaseSensitivity + { + CharacterFrequencyHelper.GetSingleStringMultiCharacterOffsets(value, ignoreCase: typeof(TCaseSensitivity) != typeof(CaseSensitive), out int ch2Offset, out int ch3Offset); + + if (CanUsePackedImpl(value[0]) && CanUsePackedImpl(value[ch2Offset]) && CanUsePackedImpl(value[ch3Offset])) + { + return new SingleStringSearchValuesPackedThreeChars(uniqueValues, value, ch2Offset, ch3Offset); + } + + return new SingleStringSearchValuesThreeChars(uniqueValues, value, ch2Offset, ch3Offset); + + // Unlike with PackedSpanHelpers (Sse2 only), we are also using this approach on ARM64. + // We use PackUnsignedSaturate on X86 and UnzipEven on ARM, so the set of allowed characters differs slightly (we can't use it for \0 and \xFF on X86). + static bool CanUsePackedImpl(char c) => + PackedSpanHelpers.PackedIndexOfIsSupported ? PackedSpanHelpers.CanUsePackedIndexOf(c) : + (AdvSimd.Arm64.IsSupported && c <= byte.MaxValue); + } + private static void AnalyzeValues( ReadOnlySpan values, ref bool ignoreCase,