Skip to content

Commit 2131a00

Browse files
committed
Fix backrefs in recursion that refer to groups outside the recursed pattern
1 parent 4966612 commit 2131a00

File tree

2 files changed

+31
-13
lines changed

2 files changed

+31
-13
lines changed

spec/recursion-spec.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ describe('recursion', () => {
5050
expect('Racecar, ABBA, and redivided'.match(palindromeWords)).toEqual(['Racecar', 'ABBA']);
5151
});
5252

53+
it('should not adjust named backreferences referring outside of the recursed expression', () => {
54+
expect('aababbabcc').toMatch(rregex`^(?<a>a)\k<a>(?<r>(?<b>b)\k<a>\k<b>\k<c>\g<r&R=2>?)(?<c>c)\k<c>$`);
55+
});
56+
5357
it('should allow directly using recursion as a plugin with tag regex', () => {
5458
expect('aAbb').toMatch(regex({flags: 'i', plugins: [recursion]})`a(?R=2)?b`);
5559
});

src/index.js

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {regex} from 'regex';
2-
import {Context, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
2+
import {Context, forEachUnescaped, getGroupContents, hasUnescaped, replaceUnescaped} from 'regex-utilities';
33

44
export function rregex(first, ...values) {
55
const plugins = (first?.plugins || []).concat(recursion);
@@ -30,16 +30,19 @@ export function recursion(expression) {
3030
return expression;
3131
}
3232
if (hasUnescaped(expression, String.raw`\\[1-9]`, Context.DEFAULT)) {
33-
// TODO: Add support for numbered backrefs by automatically adjusting them when they're
34-
// duplicated by recursion and refer to a group inside the expression being recursed. To
35-
// trigger this error, the regex must use recursion and include one of:
33+
// Could add support for numbered backrefs with extra effort, but it's probably not worth it.
34+
// To trigger this error, the regex must include recursion and one of the following:
3635
// - An interpolated regex that contains a numbered backref (since other numbered backrefs are
3736
// prevented by implicit flag n).
38-
// - A numbered backref, and flag n is explicitly disabled.
39-
// Note that `regex`'s extended syntax (atomic groups and sometimes subroutines) can add
40-
// numbered backrefs. However, those work fine because external plugins run *before* the
41-
// transpilation of built-in syntax extensions
42-
throw new Error(`Numbered backreferences cannot be used with recursion`);
37+
// - A numbered backref, when flag n is explicitly disabled.
38+
// Note that `regex`'s extended syntax (atomic groups and sometimes subroutines) can also add
39+
// numbered backrefs, but those work fine because external plugins like this one run *before*
40+
// the transpilation of built-in syntax extensions.
41+
// To support numbered backrefs, they would need to be automatically adjusted when they're
42+
// duplicated by recursion and refer to a group inside the expression being recursed.
43+
// Additionally, numbered backrefs inside and outside of the recursed expression would need to
44+
// be adjusted based on any capturing groups added by recursion.
45+
throw new Error(`Numbered backrefs cannot be used with recursion; use named backref instead`);
4346
}
4447
if (hasUnescaped(expression, String.raw`\(\?\(DEFINE\)`, Context.DEFAULT)) {
4548
throw new Error(`DEFINE groups cannot be used with recursion`);
@@ -116,29 +119,40 @@ function assertNoFollowingRecursion(remainingExpression) {
116119
@returns {string}
117120
*/
118121
function makeRecursive(pre, post, maxDepth) {
122+
const namesInRecursed = new Set();
123+
forEachUnescaped(pre + post, namedCapturingDelim, ({groups: {captureName}}) => {
124+
namesInRecursed.add(captureName);
125+
}, Context.DEFAULT);
119126
const reps = maxDepth - 1;
120127
// Depth 2: 'pre(?:pre(?:)post)post'
121128
// Depth 3: 'pre(?:pre(?:pre(?:)post)post)post'
122-
return `${pre}${repeatWithDepth(`(?:${pre}`, reps)}(?:)${repeatWithDepth(`${post})`, reps, 'backward')}${post}`;
129+
return `${pre}${
130+
repeatWithDepth(`(?:${pre}`, reps, namesInRecursed)
131+
}(?:)${
132+
repeatWithDepth(`${post})`, reps, namesInRecursed, 'backward')
133+
}${post}`;
123134
}
124135

125136
/**
126137
@param {string} expression
127138
@param {number} reps
139+
@param {Set<string>} namesInRecursed
128140
@param {'forward' | 'backward'} [direction]
129141
@returns {string}
130142
*/
131-
function repeatWithDepth(expression, reps, direction = 'forward') {
143+
function repeatWithDepth(expression, reps, namesInRecursed, direction = 'forward') {
132144
const startNum = 2;
133145
const depthNum = i => direction === 'backward' ? reps - i + startNum - 1 : i + startNum;
134146
let result = '';
135147
for (let i = 0; i < reps; i++) {
136148
const captureNum = depthNum(i);
137149
result += replaceUnescaped(
138150
expression,
139-
// FIXME: Don't change named backrefs that refer outside of the expression being recursed
140151
String.raw`${namedCapturingDelim}|\\k<(?<backref>[^>]+)>`,
141-
({groups: {captureName, backref}}) => {
152+
({0: m, groups: {captureName, backref}}) => {
153+
if (backref && !namesInRecursed.has(backref)) {
154+
return m;
155+
}
142156
const suffix = `_$${captureNum}`;
143157
return captureName ? `(?<${captureName}${suffix}>` : `\\k<${backref}${suffix}>`;
144158
},

0 commit comments

Comments
 (0)