Skip to content

Commit bb0d966

Browse files
committed
Fix capture transfer edges
1 parent 0281e56 commit bb0d966

File tree

2 files changed

+122
-93
lines changed

2 files changed

+122
-93
lines changed

spec/recursion-spec.js

Lines changed: 76 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ describe('recursion', () => {
173173
});
174174
});
175175

176-
it('should transfer to capture preceding recursion', () => {
176+
it('should transfer to capture that precedes the recursion', () => {
177177
expect(recursion(r`()(()(a)()\g<2&R=2>?b)`, {
178178
captureTransfers: new Map([[1, 4]]),
179179
hiddenCaptures: [4],
@@ -188,7 +188,7 @@ describe('recursion', () => {
188188
})).toEqual({
189189
pattern: '()(a(?:a(?:)?()(b)())?()(b)())',
190190
captureTransfers: new Map([[1, 7]]),
191-
hiddenCaptures: [7, 3, 4, 5],
191+
hiddenCaptures: [7, 3, 4, 5], // unsorted
192192
});
193193
});
194194

@@ -203,30 +203,51 @@ describe('recursion', () => {
203203
});
204204

205205
it('should transfer across multiple recursions', () => {
206+
// Capture in left contents of recursions
206207
expect(recursion(r`(?<r>(a)\g<r&R=2>?b) ((a)\g<3&R=2>?b)`, {
207208
captureTransfers: new Map([[1, 3], ['r', 3], [2, 4]]),
208209
})).toEqual({
209210
pattern: '(?<r>(a)(?:(a)(?:)?b)?b) ((a)(?:(a)(?:)?b)?b)',
210211
captureTransfers: new Map([[1, 4], ['r', 4], [2, 6]]),
211212
hiddenCaptures: [3, 6],
212213
});
214+
// Capture in right contents of recursions
213215
expect(recursion(r`(?<r>a\g<r&R=2>?(b)) (a\g<3&R=2>?(b))`, {
214216
captureTransfers: new Map([[1, 3], ['r', 3], [2, 4]]),
215217
})).toEqual({
216218
pattern: '(?<r>a(?:a(?:)?(b))?(b)) (a(?:a(?:)?(b))?(b))',
217219
captureTransfers: new Map([[1, 4], ['r', 4], [3, 6]]),
218220
hiddenCaptures: [2, 5],
219221
});
222+
// Capture in left and right contents of recursions
220223
expect(recursion(r`(?<r>(a)\g<r&R=2>?(b)) ((a)\g<4&R=2>?(b))`, {
221224
captureTransfers: new Map([[1, 4], ['r', 4], [2, 5], [3, 6]]),
222225
})).toEqual({
223226
pattern: '(?<r>(a)(?:(a)(?:)?(b))?(b)) ((a)(?:(a)(?:)?(b))?(b))',
224227
captureTransfers: new Map([[1, 6], ['r', 6], [2, 8], [5, 10]]),
225228
hiddenCaptures: [3, 4, 8, 9],
226229
});
230+
// Triple recursion with capture transfer to middle (Oniguruma: `\g<a> (?<a>a\g<b>?b) (?<b>c\g<a>?d)`)
231+
expect(recursion(r`(a(c\g<1&R=2>?d)?b) (?<a>a(c\g<3&R=2>?d)?b) (?<b>c(a\g<5&R=2>?b)?d)`, {
232+
captureTransfers: new Map([[3, 6], ['a', 6]]),
233+
hiddenCaptures: [1, 2, 4, 6],
234+
})).toEqual({
235+
pattern: '(a(c(?:a(c(?:)?d)?b)?d)?b) (?<a>a(c(?:a(c(?:)?d)?b)?d)?b) (?<b>c(a(?:c(a(?:)?b)?d)?b)?d)',
236+
captureTransfers: new Map([[4, 9],['a', 9]]),
237+
hiddenCaptures: [1, 2, 5, 8, 3, 6, 9], // unsorted
238+
});
239+
// Same as above but with depth 3
240+
expect(recursion(r`(a(c\g<1&R=3>?d)?b) (?<a>a(c\g<3&R=3>?d)?b) (?<b>c(a\g<5&R=3>?b)?d)`, {
241+
captureTransfers: new Map([[3, 6], ['a', 6]]),
242+
hiddenCaptures: [1, 2, 4, 6],
243+
})).toEqual({
244+
pattern: '(a(c(?:a(c(?:a(c(?:)?d)?b)?d)?b)?d)?b) (?<a>a(c(?:a(c(?:a(c(?:)?d)?b)?d)?b)?d)?b) (?<b>c(a(?:c(a(?:c(a(?:)?b)?d)?b)?d)?b)?d)',
245+
captureTransfers: new Map([[5, 12],['a', 12]]),
246+
hiddenCaptures: [1, 2, 6, 10, 3, 4, 7, 8, 11, 12], // unsorted
247+
});
227248
});
228249

229-
it('should transfer for captures after recursion', () => {
250+
it('should transfer between captures following recursion', () => {
230251
expect(recursion(r`((2)\g<1&R=2>?) (3) (4)`, {
231252
captureTransfers: new Map([[3, 4]]),
232253
})).toEqual({
@@ -237,64 +258,64 @@ describe('recursion', () => {
237258
});
238259
});
239260
});
261+
});
240262

241-
describe('readme examples', () => {
242-
it('should match an equal number of two different subpatterns', () => {
243-
const re = regex({plugins: [recursion]})`a(?R=20)?b`;
244-
expect(re.exec('test aaaaaabbb')[0]).toBe('aaabbb');
245-
});
263+
describe('readme examples', () => {
264+
it('should match an equal number of two different subpatterns', () => {
265+
const re = regex({plugins: [recursion]})`a(?R=20)?b`;
266+
expect(re.exec('test aaaaaabbb')[0]).toBe('aaabbb');
267+
});
246268

247-
it('should match an equal number of two different subpatterns, as the entire string', () => {
248-
const re = regex({plugins: [recursion]})`
249-
^ (?<r> a \g<r&R=20>? b) $
250-
`;
251-
expect(re.test('aaabbb')).toBeTrue();
252-
expect(re.test('aaabb')).toBeFalse();
253-
});
269+
it('should match an equal number of two different subpatterns, as the entire string', () => {
270+
const re = regex({plugins: [recursion]})`
271+
^ (?<r> a \g<r&R=20>? b) $
272+
`;
273+
expect(re.test('aaabbb')).toBeTrue();
274+
expect(re.test('aaabb')).toBeFalse();
275+
});
254276

255-
it('should match balanced parentheses', () => {
256-
const parens = regex({flags: 'g', plugins: [recursion]})`
257-
\( ([^\(\)] | (?R=20))* \)
258-
`;
259-
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
260-
});
277+
it('should match balanced parentheses', () => {
278+
const parens = regex({flags: 'g', plugins: [recursion]})`
279+
\( ([^\(\)] | (?R=20))* \)
280+
`;
281+
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
282+
});
261283

262-
it('should match balanced parentheses using an atomic group', () => {
263-
const parens = regex({flags: 'g', plugins: [recursion]})`
264-
\( ((?> [^\(\)]+) | (?R=20))* \)
265-
`;
266-
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
267-
});
284+
it('should match balanced parentheses using an atomic group', () => {
285+
const parens = regex({flags: 'g', plugins: [recursion]})`
286+
\( ((?> [^\(\)]+) | (?R=20))* \)
287+
`;
288+
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
289+
});
268290

269-
it('should match balanced parentheses using a possessive quantifier', () => {
270-
const parens = regex({flags: 'g', plugins: [recursion]})`
271-
\( ([^\(\)]++ | (?R=20))* \)
272-
`;
273-
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
274-
});
291+
it('should match balanced parentheses using a possessive quantifier', () => {
292+
const parens = regex({flags: 'g', plugins: [recursion]})`
293+
\( ([^\(\)]++ | (?R=20))* \)
294+
`;
295+
expect('test ) (balanced ((parens))) () ((a)) ( (b)'.match(parens)).toEqual(['(balanced ((parens)))', '()', '((a))', '(b)']);
296+
});
275297

276-
it('should match palindromes', () => {
277-
const palindromes = regex({flags: 'gi', plugins: [recursion]})`
278-
(?<char> \w)
279-
# Recurse, or match a lone unbalanced char in the middle
280-
((?R=15) | \w?)
281-
\k<char>
282-
`;
283-
expect('Racecar, ABBA, and redivided'.match(palindromes)).toEqual(['Racecar', 'ABBA', 'edivide']);
284-
});
298+
it('should match palindromes', () => {
299+
const palindromes = regex({flags: 'gi', plugins: [recursion]})`
300+
(?<char> \w)
301+
# Recurse, or match a lone unbalanced char in the middle
302+
((?R=15) | \w?)
303+
\k<char>
304+
`;
305+
expect('Racecar, ABBA, and redivided'.match(palindromes)).toEqual(['Racecar', 'ABBA', 'edivide']);
306+
});
285307

286-
it('should match palindromes as complete words', () => {
287-
const palindromeWords = regex({flags: 'gi', plugins: [recursion]})`
288-
\b
289-
(?<palindrome>
290-
(?<char> \w )
291-
# Recurse, or match a lone unbalanced char in the center
292-
( \g<palindrome&R=15> | \w? )
293-
\k<char>
294-
)
295-
\b
296-
`;
297-
expect('Racecar, ABBA, and redivided'.match(palindromeWords)).toEqual(['Racecar', 'ABBA']);
298-
});
308+
it('should match palindromes as complete words', () => {
309+
const palindromeWords = regex({flags: 'gi', plugins: [recursion]})`
310+
\b
311+
(?<palindrome>
312+
(?<char> \w )
313+
# Recurse, or match a lone unbalanced char in the center
314+
( \g<palindrome&R=15> | \w? )
315+
\k<char>
316+
)
317+
\b
318+
`;
319+
expect('Racecar, ABBA, and redivided'.match(palindromeWords)).toEqual(['Racecar', 'ABBA']);
299320
});
300321
});

src/index.js

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,16 @@ function recursion(pattern, data) {
7979
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with global recursion`
8080
);
8181
}
82-
const pre = pattern.slice(0, match.index);
83-
const post = pattern.slice(token.lastIndex);
84-
if (hasUnescaped(post, recursiveToken, Context.DEFAULT)) {
82+
const left = pattern.slice(0, match.index);
83+
const right = pattern.slice(token.lastIndex);
84+
if (hasUnescaped(right, recursiveToken, Context.DEFAULT)) {
8585
throw new Error(overlappingRecursionMsg);
8686
}
87+
const reps = +rDepth - 1;
8788
pattern = makeRecursive(
88-
pre,
89-
post,
90-
+rDepth,
89+
left,
90+
right,
91+
reps,
9192
false,
9293
hiddenCaptures,
9394
addedHiddenCaptures,
@@ -96,7 +97,8 @@ function recursion(pattern, data) {
9697
captureTransfers = mapCaptureTransfers(
9798
captureTransfers,
9899
numCapturesPassed,
99-
pre,
100+
left,
101+
reps,
100102
addedHiddenCaptures.length,
101103
0
102104
);
@@ -132,13 +134,14 @@ function recursion(pattern, data) {
132134
`${mode === 'external' ? 'Backrefs' : 'Numbered backrefs'} cannot be used with recursion of capturing groups`
133135
);
134136
}
135-
const groupContentsPre = pattern.slice(startPos, match.index);
136-
const groupContentsPost = groupContents.slice(groupContentsPre.length + m.length);
137+
const groupContentsLeft = pattern.slice(startPos, match.index);
138+
const groupContentsRight = groupContents.slice(groupContentsLeft.length + m.length);
137139
const numAddedHiddenCapturesPreExpansion = addedHiddenCaptures.length;
140+
const reps = +gRDepth - 1;
138141
const expansion = makeRecursive(
139-
groupContentsPre,
140-
groupContentsPost,
141-
+gRDepth,
142+
groupContentsLeft,
143+
groupContentsRight,
144+
reps,
142145
true,
143146
hiddenCaptures,
144147
addedHiddenCaptures,
@@ -147,7 +150,8 @@ function recursion(pattern, data) {
147150
captureTransfers = mapCaptureTransfers(
148151
captureTransfers,
149152
numCapturesPassed,
150-
groupContentsPre,
153+
groupContentsLeft,
154+
reps,
151155
addedHiddenCaptures.length - numAddedHiddenCapturesPreExpansion,
152156
numAddedHiddenCapturesPreExpansion
153157
);
@@ -156,7 +160,7 @@ function recursion(pattern, data) {
156160
// Modify the string we're looping over
157161
pattern = `${pre}${expansion}${post}`;
158162
// Step forward for the next loop iteration
159-
token.lastIndex += expansion.length - m.length - groupContentsPre.length - groupContentsPost.length;
163+
token.lastIndex += expansion.length - m.length - groupContentsLeft.length - groupContentsRight.length;
160164
openGroups.forEach(g => g.hasRecursedWithin = true);
161165
hasRecursed = true;
162166
} else if (captureName) {
@@ -207,19 +211,19 @@ function assertMaxInBounds(max) {
207211
}
208212

209213
/**
210-
@param {string} pre
211-
@param {string} post
212-
@param {number} maxDepth
214+
@param {string} left
215+
@param {string} right
216+
@param {number} reps
213217
@param {boolean} isSubpattern
214218
@param {Array<number>} hiddenCaptures
215219
@param {Array<number>} addedHiddenCaptures
216220
@param {number} numCapturesPassed
217221
@returns {string}
218222
*/
219223
function makeRecursive(
220-
pre,
221-
post,
222-
maxDepth,
224+
left,
225+
right,
226+
reps,
223227
isSubpattern,
224228
hiddenCaptures,
225229
addedHiddenCaptures,
@@ -228,25 +232,25 @@ function makeRecursive(
228232
const namesInRecursed = new Set();
229233
// Can skip this work if not needed
230234
if (isSubpattern) {
231-
forEachUnescaped(pre + post, namedCaptureDelim, ({groups: {captureName}}) => {
235+
forEachUnescaped(left + right, namedCaptureDelim, ({groups: {captureName}}) => {
232236
namesInRecursed.add(captureName);
233237
}, Context.DEFAULT);
234238
}
235239
const rest = [
236-
maxDepth - 1, // reps
237-
isSubpattern ? namesInRecursed : null, // namesInRecursed
240+
reps,
241+
isSubpattern ? namesInRecursed : null,
238242
hiddenCaptures,
239243
addedHiddenCaptures,
240244
numCapturesPassed,
241245
];
242-
// Depth 2: 'pre(?:pre(?:)post)post'
243-
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
246+
// Depth 2: 'left(?:left(?:)right)right'
247+
// Depth 3: 'left(?:left(?:left(?:)right)right)right'
244248
// Empty group in the middle separates tokens and absorbs a following quantifier if present
245-
return `${pre}${
246-
repeatWithDepth(`(?:${pre}`, 'forward', ...rest)
249+
return `${left}${
250+
repeatWithDepth(`(?:${left}`, 'forward', ...rest)
247251
}(?:)${
248-
repeatWithDepth(`${post})`, 'backward', ...rest)
249-
}${post}`;
252+
repeatWithDepth(`${right})`, 'backward', ...rest)
253+
}${right}`;
250254
}
251255

252256
/**
@@ -312,28 +316,32 @@ function incrementIfAtLeast(arr, threshold) {
312316
/**
313317
@param {Map<number | string, number>} captureTransfers
314318
@param {number} numCapturesPassed
315-
@param {string} leftContents
319+
@param {string} left
320+
@param {number} reps
316321
@param {number} numCapturesAddedInExpansion
317322
@param {number} numAddedHiddenCapturesPreExpansion
318323
@returns {Map<number | string, number>}
319324
*/
320-
function mapCaptureTransfers(captureTransfers, numCapturesPassed, leftContents, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion) {
325+
function mapCaptureTransfers(captureTransfers, numCapturesPassed, left, reps, numCapturesAddedInExpansion, numAddedHiddenCapturesPreExpansion) {
321326
if (captureTransfers.size && numCapturesAddedInExpansion) {
322-
let numCapturesInLeftContents = 0;
323-
forEachUnescaped(leftContents, captureDelim, () => numCapturesInLeftContents++, Context.DEFAULT);
324-
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeftContents + numAddedHiddenCapturesPreExpansion;
327+
let numCapturesInLeft = 0;
328+
forEachUnescaped(left, captureDelim, () => numCapturesInLeft++, Context.DEFAULT);
329+
const recursionDelimCaptureNum = numCapturesPassed - numCapturesInLeft + numAddedHiddenCapturesPreExpansion; // 0 for global
325330
const newCaptureTransfers = new Map();
326331
captureTransfers.forEach((/** @type {number} */ from, /** @type {number | string} */ to) => {
332+
// `to` can be a group number or name
333+
if (to > (numCapturesPassed + numAddedHiddenCapturesPreExpansion)) {
334+
to += numCapturesAddedInExpansion;
335+
}
327336
if (from > recursionDelimCaptureNum) {
328337
from += (
329338
// if capture is on left side of expanded group
330-
from <= (recursionDelimCaptureNum + numCapturesInLeftContents) ?
331-
numCapturesInLeftContents :
339+
from <= (recursionDelimCaptureNum + numCapturesInLeft) ?
340+
numCapturesInLeft * reps :
332341
numCapturesAddedInExpansion
333342
);
334343
}
335-
// `to` can be a group number or name
336-
newCaptureTransfers.set((to > numCapturesPassed ? to + numCapturesAddedInExpansion : to), from);
344+
newCaptureTransfers.set(to, from);
337345
});
338346
return newCaptureTransfers;
339347
}

0 commit comments

Comments
 (0)