Skip to content

Commit b54bfdd

Browse files
authored
Some more cleanup to regex NonBacktracking (#104766)
* Rent object[] rather than (uint,uint)[][] from the ArrayPool * Remove unnecessary TInputReader generic from functions * Add more comments and do some renames * Remove unused TFindOptimizationsHandler from FindEndPositionDeltasNFA * Fix a stray input reader * Some more renames * Avoid duplicated reads of input character and nullability info * Remove initialStateId from TryFindNextStartingPosition and make initial accelerators more similar * Remove unused initialStatePos / initialStatePosCandidate It's only ever written and not actually used for anything. * Remove unnecessary generic args and remove resulting dead code Multiple XxDfa / XxNfa methods took a TStateHandler, but it was only ever DfaStateHandler for XxDfa or NfaStateHandler for XxNfa. We can just use the types directly in those methods, rather than generically parameterizing. Doing that revealed all but one of the members of IStateHandler weren't needed on the interface. And removing those revealed a bunch of dead code on DfaStateHandler/NfaStateHandler, which were removed, as well as arguments to some methods that weren't used. * Put GetStateFlags back in IStateHandler and use it to avoid duplication at call sites * Put out argument last in TryCreateNewTransition * Store state to local in FindStartPositionDeltasDFA * Merge IAcceleratedStateHandler into IInitialStateHandler * Remove MintermClassifier.IntLookup
1 parent 9b09bcf commit b54bfdd

File tree

5 files changed

+263
-340
lines changed

5 files changed

+263
-340
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MintermClassifier.cs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,11 @@ public MintermClassifier(BDD[] minterms)
4747
// in order to size the lookup array to minimize steady-state memory consumption of the potentially
4848
// large lookup array. We prefer to use the byte[] _lookup when possible, in order to keep memory
4949
// consumption to a minimum; doing so accomodates up to 255 minterms, which is the vast majority case.
50-
// However, when there are more than 255 minterms, we need to use int[] _intLookup.
51-
(uint, uint)[][] charRangesPerMinterm = ArrayPool<(uint, uint)[]>.Shared.Rent(minterms.Length);
50+
// However, when there are more than 255 minterms, we need to use int[] _intLookup. We rent an object[]
51+
// rather than a (uint,uint)[][] to avoid the extra type pressure on the ArrayPool (object[]s are common,
52+
// (uint,uint)[][]s much less so).
53+
object[] arrayPoolArray = ArrayPool<object>.Shared.Rent(minterms.Length);
54+
Span<object> charRangesPerMinterm = arrayPoolArray.AsSpan(0, minterms.Length);
5255

5356
int maxChar = -1;
5457
for (int mintermId = 1; mintermId < minterms.Length; mintermId++)
@@ -70,17 +73,17 @@ public MintermClassifier(BDD[] minterms)
7073
}
7174

7275
// Return the rented array. We clear it before returning it in order to avoid all the ranges arrays being kept alive.
73-
Array.Clear(charRangesPerMinterm, 0, minterms.Length);
74-
ArrayPool<(uint, uint)[]>.Shared.Return(charRangesPerMinterm);
76+
charRangesPerMinterm.Clear();
77+
ArrayPool<object>.Shared.Return(arrayPoolArray);
7578

76-
// Creates the lookup array.
77-
static T[] CreateLookup<T>(BDD[] minterms, ReadOnlySpan<(uint, uint)[]> charRangesPerMinterm, int _maxChar) where T : IBinaryInteger<T>
79+
// Creates the lookup array. charRangesPerMinterm needs to have already been populated with (uint, uint)[] instances.
80+
static T[] CreateLookup<T>(BDD[] minterms, ReadOnlySpan<object> charRangesPerMinterm, int _maxChar) where T : IBinaryInteger<T>
7881
{
7982
T[] lookup = new T[_maxChar + 1];
8083
for (int mintermId = 1; mintermId < minterms.Length; mintermId++)
8184
{
8285
// Each minterm maps to a range of characters. Set each of the characters in those ranges to the corresponding minterm.
83-
foreach ((uint start, uint end) in charRangesPerMinterm[mintermId])
86+
foreach ((uint start, uint end) in ((uint, uint)[])charRangesPerMinterm[mintermId])
8487
{
8588
lookup.AsSpan((int)start, (int)(end + 1 - start)).Fill(T.CreateTruncating(mintermId));
8689
}
@@ -101,7 +104,9 @@ public int GetMintermID(int c)
101104
}
102105
else
103106
{
104-
int[] lookup = _intLookup!;
107+
Debug.Assert(_intLookup is not null);
108+
109+
int[] lookup = _intLookup;
105110
return (uint)c < (uint)lookup.Length ? lookup[c] : 0;
106111
}
107112
}
@@ -111,12 +116,6 @@ public int GetMintermID(int c)
111116
/// </summary>
112117
public byte[]? ByteLookup => _lookup;
113118

114-
/// <summary>
115-
/// Gets a mapping from char to minterm for the rare case when there are &gt;= 255 minterms.
116-
/// Null in the common case where there are fewer than 255 minterms.
117-
/// </summary>
118-
public int[]? IntLookup => _intLookup;
119-
120119
/// <summary>
121120
/// Maximum ordinal character for a non-0 minterm, used to conserve memory
122121
/// </summary>

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ private static void ArrayResizeAndVolatilePublish<T>(ref T[] array, int newSize)
120120
/// Pre-computed hot-loop version of nullability check
121121
/// </summary>
122122
[MethodImpl(MethodImplOptions.AggressiveInlining)]
123-
private bool IsNullableWithContext(int stateId, int mintermId) =>
124-
(_nullabilityArray[stateId] & (1 << (int)GetPositionKind(mintermId))) > 0;
123+
private bool IsNullableWithContext(byte stateNullability, int mintermId) =>
124+
(stateNullability & (1 << (int)GetPositionKind(mintermId))) != 0;
125125

126126
/// <summary>Returns the span from <see cref="_dfaDelta"/> that may contain transitions for the given state</summary>
127127
private Span<int> GetDeltasFor(MatchingState<TSet> state)
@@ -355,9 +355,7 @@ private int GetCoreStateId(int nfaStateId)
355355

356356
/// <summary>Gets or creates a new DFA transition.</summary>
357357
/// <remarks>This function locks the matcher for safe concurrent use of the <see cref="_builder"/></remarks>
358-
private bool TryCreateNewTransition(
359-
MatchingState<TSet> sourceState, int mintermId, int offset, bool checkThreshold, [NotNullWhen(true)] out MatchingState<TSet>? nextState,
360-
long timeoutOccursAt = 0)
358+
private bool TryCreateNewTransition(MatchingState<TSet> sourceState, int mintermId, int offset, bool checkThreshold, long timeoutOccursAt, [NotNullWhen(true)] out MatchingState<TSet>? nextState)
361359
{
362360
Debug.Assert(offset < _dfaDelta.Length);
363361
lock (this)

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,22 @@ public override void Explore(bool includeDotStarred, bool includeReverse, bool i
3535
{
3636
// Don't dequeue yet, because a transition might fail
3737
MatchingState<TSet> state = toExplore.Peek();
38+
3839
// Include the special minterm for the last end-of-line if the state is sensitive to it
3940
int maxMinterm = state.StartsWithLineAnchor ? _minterms!.Length : _minterms!.Length - 1;
41+
4042
// Explore successor states for each minterm
4143
for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId)
4244
{
4345
int offset = DeltaOffset(state.Id, mintermId);
44-
if (!TryCreateNewTransition(state, mintermId, offset, true, out MatchingState<TSet>? nextState))
46+
if (!TryCreateNewTransition(state, mintermId, offset, true, 0, out MatchingState<TSet>? nextState))
47+
{
4548
goto DfaLimitReached;
49+
}
50+
4651
EnqueueIfUnseen(nextState, seen, toExplore);
4752
}
53+
4854
// Safe to dequeue now that the state has been completely handled
4955
toExplore.Dequeue();
5056
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public override IEnumerable<string> SampleMatches(int k, int randomseed)
7171
NfaMatchingState states = new();
7272
// Here one could also consider previous characters for example for \b, \B, and ^ anchors
7373
// and initialize inputSoFar accordingly
74-
states.InitializeFrom(this, _initialStates[GetCharKind<FullInputReader>([], -1)]);
74+
states.InitializeFrom(this, _initialStates[GetCharKind([], -1)]);
7575
CurrentState statesWrapper = new(states);
7676

7777
// Used for end suffixes

0 commit comments

Comments
 (0)