Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions src/libraries/System.Memory/tests/Span/StringSearchValues.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,11 @@ void TestWithPoisonPages(PoisonPagePlacement poisonPlacement, int haystackLength
.First(c => !values.AsSpan().ContainsAny(c, char.ToLowerInvariant(c)));
}

TestWithDifferentMarkerChars(haystack, '\0');
if (!values.Contains('\0'))
{
TestWithDifferentMarkerChars(haystack, '\0');
}

TestWithDifferentMarkerChars(haystack, '\u00FC');
TestWithDifferentMarkerChars(haystack, asciiNumberNotInSet);
TestWithDifferentMarkerChars(haystack, asciiLetterLowerNotInSet);
Expand Down Expand Up @@ -407,10 +411,26 @@ public static void SimpleIndexOfAnyValues(params string[] valuesArray)
valuesArray[offset] = $"{original[0]}\u00F6{original.AsSpan(1)}";
TestCore(valuesArray);

// Test non-ASCII values over 0xFF
valuesArray[offset] = $"{original}\u2049";
TestCore(valuesArray);

valuesArray[offset] = $"\u2049{original}";
TestCore(valuesArray);

valuesArray[offset] = $"{original[0]}\u2049{original.AsSpan(1)}";
TestCore(valuesArray);

// Test null chars in values
valuesArray[offset] = $"{original[0]}\0{original.AsSpan(1)}";
TestCore(valuesArray);

valuesArray[offset] = $"\0{original}";
TestCore(valuesArray);

valuesArray[offset] = $"{original}\0";
TestCore(valuesArray);

static void TestCore(string[] valuesArray)
{
Values_ImplementsSearchValuesBase(StringComparison.Ordinal, valuesArray);
Expand Down Expand Up @@ -529,7 +549,7 @@ public static void TestIndexOfAny_RandomInputs_Stress()
if (RemoteExecutor.IsSupported && Avx512F.IsSupported)
{
var psi = new ProcessStartInfo();
psi.Environment.Add("DOTNET_EnableAVX512F", "0");
psi.Environment.Add("DOTNET_EnableAVX512", "0");
RemoteExecutor.Invoke(RunStress, new RemoteInvokeOptions { StartInfo = psi, TimeOut = 10 * 60 * 1000 }).Dispose();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyNonBucketizedN3.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\AsciiStringSearchValuesTeddyBase.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\MultiStringIgnoreCaseSearchValuesFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\SingleStringSearchValuesPackedThreeChars.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\SingleStringSearchValuesThreeChars.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\SingleStringSearchValuesFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Strings\StringSearchValues.cs" />
Expand Down Expand Up @@ -2462,10 +2463,8 @@
<Compile Include="$(CommonPath)Interop\Unix\System.Native\Interop.MountPoints.cs">
<Link>Common\Interop\Unix\System.Native\Interop.MountPoints.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Linux\procfs\Interop.ProcMountInfo.cs"
Link="Common\Interop\Linux\procfs\Interop.ProcMountInfo.cs" />
<Compile Include="$(CommonPath)Interop\Linux\procfs\Interop.ProcMountInfo.TryParseMountInfoLine.cs"
Link="Common\Interop\Linux\procfs\Interop.ProcMountInfo.TryParseMountInfoLine.cs" />
<Compile Include="$(CommonPath)Interop\Linux\procfs\Interop.ProcMountInfo.cs" Link="Common\Interop\Linux\procfs\Interop.ProcMountInfo.cs" />
<Compile Include="$(CommonPath)Interop\Linux\procfs\Interop.ProcMountInfo.TryParseMountInfoLine.cs" Link="Common\Interop\Linux\procfs\Interop.ProcMountInfo.TryParseMountInfoLine.cs" />
<Compile Include="$(CommonPath)Interop\Unix\System.Native\Interop.Open.cs">
<Link>Common\Interop\Unix\System.Native\Interop.Open.cs</Link>
</Compile>
Expand Down Expand Up @@ -2879,4 +2878,4 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Threading\Wasi\WasiPollWorld.wit.imports.wasi.io.v0_2_0.IPoll.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Threading\Wasi\WasiPollWorld.wit.imports.wasi.io.v0_2_0.PollInterop.cs" />
</ItemGroup>
</Project>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ namespace System.Buffers
//
// For an alternative description of the algorithm, see
// https://github.com/BurntSushi/aho-corasick/blob/8d735471fc12f0ca570cead8e17342274fae6331/src/packed/teddy/README.md
// Has an O(i * m) worst-case, with the expected time closer to O(n) for good bucket distributions.
// Has an O(i * m) worst-case, with the expected time closer to O(i) for good bucket distributions.
internal abstract class AsciiStringSearchValuesTeddyBase<TBucketized, TStartCaseSensitivity, TCaseSensitivity> : StringSearchValuesRabinKarp<TCaseSensitivity>
where TBucketized : struct, SearchValues.IRuntimeConst
where TStartCaseSensitivity : struct, ICaseSensitivity // Refers to the characters being matched by Teddy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Text;

namespace System.Buffers
Expand Down Expand Up @@ -270,12 +271,31 @@ public static bool Equals<TValueLength>(ref char matchStart, ref readonly Single
else
{
Debug.Assert(state.Value.Length is 2 or 3);
Debug.Assert(matchStart == state.Value[0], "This should only be called after the first character has been checked");

// We know that the candidate is 2 or 3 characters long, and that the first character has already been checked.
// We only have to to check whether the last 2 characters also match.
ref byte matchByteStart = ref Unsafe.As<char, byte>(ref matchStart);
return Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) == state.Value32_1;

if (AdvSimd.IsSupported)
{
// See comments on SingleStringSearchValuesPackedThreeChars.CanSkipAnchorMatchVerification.
// When running on Arm64, this helper is also used to confirm vectorized anchor matches.
// We do so because we're using UnzipEven when packing inputs, which may produce false positive anchor matches.
// When called from SingleStringSearchValuesThreeChars (non-packed), we could skip to the else branch instead.
Debug.Assert(matchStart == state.Value[0] || (matchStart & 0xFF) == state.Value[0]);

uint differentBits = Unsafe.ReadUnaligned<uint>(ref matchByteStart) - state.Value32_0;
differentBits |= Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) - state.Value32_1;
return differentBits == 0;
}
else
{
// Otherwise, this path is not used when confirming vectorized anchor matches.
// It's only used as part of the scalar search loop, which always checks that the first character matches before calling this helper.
// We know that the candidate is 2 or 3 characters long, and that the first character has already been checked.
// We only have to to check whether the last 2 characters also match.
Debug.Assert(matchStart == state.Value[0], "This should only be called after the first character has been checked");

return Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) == state.Value32_1;
}
}
}
}
Expand Down Expand Up @@ -319,13 +339,32 @@ public static bool Equals<TValueLength>(ref char matchStart, ref readonly Single
else
{
Debug.Assert(state.Value.Length is 2 or 3);
Debug.Assert(TransformInput(matchStart) == state.Value[0], "This should only be called after the first character has been checked");

// We know that the candidate is 2 or 3 characters long, and that the first character has already been checked.
// We only have to to check whether the last 2 characters also match.
const uint CaseMask = ~0x200020u;
ref byte matchByteStart = ref Unsafe.As<char, byte>(ref matchStart);
return (Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) == state.Value32_1;

if (AdvSimd.IsSupported)
{
// See comments on SingleStringSearchValuesPackedThreeChars.CanSkipAnchorMatchVerification.
// When running on Arm64, this helper is also used to confirm vectorized anchor matches.
// We do so because we're using UnzipEven when packing inputs, which may produce false positive anchor matches.
// When called from SingleStringSearchValuesThreeChars (non-packed), we could skip to the else branch instead.
Debug.Assert(TransformInput((char)(matchStart & 0xFF)) == state.Value[0]);

uint differentBits = (Unsafe.ReadUnaligned<uint>(ref matchByteStart) & CaseMask) - state.Value32_0;
differentBits |= (Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) - state.Value32_1;
return differentBits == 0;
}
else
{
// Otherwise, this path is not used when confirming vectorized anchor matches.
// It's only used as part of the scalar search loop, which always checks that the first character matches before calling this helper.
// We know that the candidate is 2 or 3 characters long, and that the first character has already been checked.
// We only have to to check whether the last 2 characters also match.
Debug.Assert(TransformInput(matchStart) == state.Value[0], "This should only be called after the first character has been checked");

return (Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref matchByteStart, state.SecondReadByteOffset)) & CaseMask) == state.Value32_1;
}
}
}
}
Expand Down Expand Up @@ -392,7 +431,6 @@ public static bool Equals<TValueLength>(ref char matchStart, ref readonly Single
else
{
Debug.Assert(state.Value.Length is 2 or 3);
Debug.Assert((matchStart & ~0x20) == (state.Value[0] & ~0x20));

ref byte matchByteStart = ref Unsafe.As<char, byte>(ref matchStart);
uint differentBits = (Unsafe.ReadUnaligned<uint>(ref matchByteStart) & state.ToUpperMask32_0) - state.Value32_0;
Expand Down
Loading
Loading