Skip to content

Commit c44a057

Browse files
Type Inference for Regular Expressions
1 parent cd6c0a0 commit c44a057

File tree

2,474 files changed

+90788
-49543
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,474 files changed

+90788
-49543
lines changed

src/compiler/checker.ts

Lines changed: 114 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,10 @@ import {
962962
rangeOfTypeParameters,
963963
ReadonlyKeyword,
964964
reduceLeft,
965+
RegExpAnyString,
966+
RegularExpressionFlags,
965967
RegularExpressionLiteral,
968+
RegularExpressionPatternUnion,
966969
RelationComparisonResult,
967970
relativeComplement,
968971
removeExtension,
@@ -1304,6 +1307,17 @@ const typeofNEFacts: ReadonlyMap<string, TypeFacts> = new Map(Object.entries({
13041307
function: TypeFacts.TypeofNEFunction,
13051308
}));
13061309

1310+
const regExpFlagToPropertyName: ReadonlyMap<RegularExpressionFlags, __String> = new Map([
1311+
[RegularExpressionFlags.HasIndices, "hasIndices" as __String],
1312+
[RegularExpressionFlags.Global, "global" as __String],
1313+
[RegularExpressionFlags.IgnoreCase, "ignoreCase" as __String],
1314+
[RegularExpressionFlags.Multiline, "multiline" as __String],
1315+
[RegularExpressionFlags.DotAll, "dotAll" as __String],
1316+
[RegularExpressionFlags.Unicode, "unicode" as __String],
1317+
[RegularExpressionFlags.UnicodeSets, "unicodeSets" as __String],
1318+
[RegularExpressionFlags.Sticky, "sticky" as __String],
1319+
]);
1320+
13071321
type TypeSystemEntity = Node | Symbol | Type | Signature;
13081322

13091323
const enum TypeSystemPropertyName {
@@ -2087,7 +2101,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
20872101
var stringNumberSymbolType = getUnionType([stringType, numberType, esSymbolType]);
20882102
var numberOrBigIntType = getUnionType([numberType, bigintType]);
20892103
var templateConstraintType = getUnionType([stringType, numberType, booleanType, bigintType, nullType, undefinedType]) as UnionType;
2090-
var numericStringType = getTemplateLiteralType(["", ""], [numberType]); // The `${number}` type
2104+
var numericStringType = getTemplateLiteralType(/*texts*/ undefined, [numberType]); // The `${number}` type
20912105

20922106
var restrictiveMapper: TypeMapper = makeFunctionTypeMapper(t => t.flags & TypeFlags.TypeParameter ? getRestrictiveTypeParameter(t as TypeParameter) : t, () => "(restrictive mapper)");
20932107
var permissiveMapper: TypeMapper = makeFunctionTypeMapper(t => t.flags & TypeFlags.TypeParameter ? wildcardType : t, () => "(permissive mapper)");
@@ -2230,7 +2244,6 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
22302244
var globalStringType: ObjectType;
22312245
var globalNumberType: ObjectType;
22322246
var globalBooleanType: ObjectType;
2233-
var globalRegExpType: ObjectType;
22342247
var globalThisType: GenericType;
22352248
var anyArrayType: Type;
22362249
var autoArrayType: Type;
@@ -2269,6 +2282,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
22692282
var deferredGlobalImportAttributesType: ObjectType | undefined;
22702283
var deferredGlobalDisposableType: ObjectType | undefined;
22712284
var deferredGlobalAsyncDisposableType: ObjectType | undefined;
2285+
var deferredGlobalRegExpSymbol: Symbol | undefined;
22722286
var deferredGlobalExtractSymbol: Symbol | undefined;
22732287
var deferredGlobalOmitSymbol: Symbol | undefined;
22742288
var deferredGlobalAwaitedSymbol: Symbol | undefined;
@@ -16914,6 +16928,12 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
1691416928
return symbol && getTypeOfGlobalSymbol(symbol, arity) as GenericType;
1691516929
}
1691616930

16931+
function getGlobalRegExpSymbol(): Symbol | undefined {
16932+
// We always report an error, so cache a result in the event we could not resolve the symbol to prevent reporting it multiple times
16933+
deferredGlobalRegExpSymbol ||= getGlobalTypeAliasSymbol("RegExp" as __String, /*arity*/ 3, /*reportErrors*/ true) || unknownSymbol;
16934+
return deferredGlobalRegExpSymbol === unknownSymbol ? undefined : deferredGlobalRegExpSymbol;
16935+
}
16936+
1691716937
function getGlobalExtractSymbol(): Symbol | undefined {
1691816938
// We always report an error, so cache a result in the event we could not resolve the symbol to prevent reporting it multiple times
1691916939
deferredGlobalExtractSymbol ||= getGlobalTypeAliasSymbol("Extract" as __String, /*arity*/ 2, /*reportErrors*/ true) || unknownSymbol;
@@ -18072,8 +18092,11 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
1807218092
return reduceLeft(types, (n, t) => t.flags & TypeFlags.Union ? n * (t as UnionType).types.length : t.flags & TypeFlags.Never ? 0 : n, 1);
1807318093
}
1807418094

18075-
function checkCrossProductUnion(types: readonly Type[]) {
18095+
function checkCrossProductUnion(types: readonly Type[], isRegularExpression?: boolean) {
1807618096
const size = getCrossProductUnionSize(types);
18097+
if (isRegularExpression) {
18098+
return size < 10000;
18099+
}
1807718100
if (size >= 100000) {
1807818101
tracing?.instant(tracing.Phase.CheckTypes, "checkCrossProductUnion_DepthLimit", { typeIds: types.map(t => t.id), size });
1807918102
error(currentNode, Diagnostics.Expression_produces_a_union_type_that_is_too_complex_to_represent);
@@ -18329,19 +18352,19 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
1832918352
return links.resolvedType;
1833018353
}
1833118354

18332-
function getTemplateLiteralType(texts: readonly string[], types: readonly Type[]): Type {
18355+
function getTemplateLiteralType(texts: readonly string[] | undefined, types: readonly Type[], isRegularExpression?: boolean): Type {
1833318356
const unionIndex = findIndex(types, t => !!(t.flags & (TypeFlags.Never | TypeFlags.Union)));
1833418357
if (unionIndex >= 0) {
18335-
return checkCrossProductUnion(types) ?
18336-
mapType(types[unionIndex], t => getTemplateLiteralType(texts, replaceElement(types, unionIndex, t))) :
18337-
errorType;
18358+
return checkCrossProductUnion(types, isRegularExpression) ?
18359+
mapType(types[unionIndex], t => getTemplateLiteralType(texts, replaceElement(types, unionIndex, t), isRegularExpression)) :
18360+
isRegularExpression ? stringType : errorType;
1833818361
}
1833918362
if (contains(types, wildcardType)) {
1834018363
return wildcardType;
1834118364
}
1834218365
const newTypes: Type[] = [];
1834318366
const newTexts: string[] = [];
18344-
let text = texts[0];
18367+
let text = texts ? texts[0] : "";
1834518368
if (!addSpans(texts, types)) {
1834618369
return stringType;
1834718370
}
@@ -18365,22 +18388,27 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
1836518388
}
1836618389
return type;
1836718390

18368-
function addSpans(texts: readonly string[], types: readonly Type[]): boolean {
18391+
function addSpans(texts: readonly string[] | undefined, types: readonly Type[]): boolean {
1836918392
for (let i = 0; i < types.length; i++) {
1837018393
const t = types[i];
1837118394
if (t.flags & (TypeFlags.Literal | TypeFlags.Null | TypeFlags.Undefined)) {
1837218395
text += getTemplateStringForType(t) || "";
18373-
text += texts[i + 1];
18396+
if (texts) text += texts[i + 1];
1837418397
}
1837518398
else if (t.flags & TypeFlags.TemplateLiteral) {
1837618399
text += (t as TemplateLiteralType).texts[0];
1837718400
if (!addSpans((t as TemplateLiteralType).texts, (t as TemplateLiteralType).types)) return false;
18378-
text += texts[i + 1];
18401+
if (texts) text += texts[i + 1];
1837918402
}
1838018403
else if (isGenericIndexType(t) || isPatternLiteralPlaceholderType(t)) {
18381-
newTypes.push(t);
18382-
newTexts.push(text);
18383-
text = texts[i + 1];
18404+
if (!text && lastOrUndefined(newTypes) === stringType && t === stringType) {
18405+
// Quickly collapse consecutive `${string}${string}` for calls from regular expressions
18406+
}
18407+
else {
18408+
newTypes.push(t);
18409+
newTexts.push(text);
18410+
}
18411+
text = texts ? texts[i + 1] : "";
1838418412
}
1838518413
else {
1838618414
return false;
@@ -18413,7 +18441,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
1841318441
type.flags & TypeFlags.StringMapping && symbol === type.symbol ? type :
1841418442
type.flags & (TypeFlags.Any | TypeFlags.String | TypeFlags.StringMapping) || isGenericIndexType(type) ? getStringMappingTypeForGenericType(symbol, type) :
1841518443
// This handles Mapping<`${number}`> and Mapping<`${bigint}`>
18416-
isPatternLiteralPlaceholderType(type) ? getStringMappingTypeForGenericType(symbol, getTemplateLiteralType(["", ""], [type])) :
18444+
isPatternLiteralPlaceholderType(type) ? getStringMappingTypeForGenericType(symbol, getTemplateLiteralType(/*texts*/ undefined, [type])) :
1841718445
type;
1841818446
}
1841918447

@@ -25897,7 +25925,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
2589725925
}
2589825926

2589925927
function getStringLikeTypeForType(type: Type) {
25900-
return type.flags & (TypeFlags.Any | TypeFlags.StringLike) ? type : getTemplateLiteralType(["", ""], [type]);
25928+
return type.flags & (TypeFlags.Any | TypeFlags.StringLike) ? type : getTemplateLiteralType(/*texts*/ undefined, [type]);
2590125929
}
2590225930

2590325931
// This function infers from the text parts and type parts of a source literal to a target template literal. The number
@@ -32398,9 +32426,10 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
3239832426
}
3239932427
}
3240032428

32401-
function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) {
32402-
const sourceFile = getSourceFileOfNode(node);
32403-
if (!hasParseDiagnostics(sourceFile) && !node.isUnterminated) {
32429+
function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
32430+
const regExpTypeAlias = getGlobalRegExpSymbol();
32431+
if (regExpTypeAlias) {
32432+
const sourceFile = getSourceFileOfNode(node);
3240432433
let lastError: DiagnosticWithLocation | undefined;
3240532434
scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true);
3240632435
scanner.setScriptTarget(sourceFile.languageVersion);
@@ -32421,23 +32450,79 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
3242132450
try {
3242232451
scanner.scan();
3242332452
Debug.assert(scanner.reScanSlashToken(/*reportErrors*/ true) === SyntaxKind.RegularExpressionLiteral, "Expected scanner to rescan RegularExpressionLiteral");
32424-
return !!lastError;
3242532453
}
3242632454
finally {
3242732455
scanner.setText("");
3242832456
scanner.setOnError(/*onError*/ undefined);
3242932457
}
32430-
}
32431-
return false;
32432-
}
3243332458

32434-
function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
32435-
const nodeLinks = getNodeLinks(node);
32436-
if (!(nodeLinks.flags & NodeCheckFlags.TypeChecked)) {
32437-
nodeLinks.flags |= NodeCheckFlags.TypeChecked;
32438-
addLazyDiagnostic(() => checkGrammarRegularExpressionLiteral(node));
32459+
const regExpCapturingGroups = scanner.getRegExpCapturingGroups();
32460+
const regExpCapturingGroupSpecifiers = scanner.getRegExpCapturingGroupSpecifiers();
32461+
const patternUnionTypeCache = new WeakMap<RegularExpressionPatternUnion, Type>();
32462+
32463+
const capturingGroupsType = createTupleType(map(regExpCapturingGroups, patternUnion => {
32464+
const patternUnionType = getTypeFromPatternUnion(patternUnion);
32465+
return patternUnion.isPossiblyUndefined ? getUnionType([patternUnionType, undefinedType]) : patternUnionType;
32466+
}));
32467+
32468+
let namedCapturingGroupsType: Type;
32469+
if (regExpCapturingGroupSpecifiers?.size) {
32470+
const namedCapturingGroupsTypeMembers = createSymbolTable();
32471+
for (const [groupName, capturingGroups] of regExpCapturingGroupSpecifiers) {
32472+
const escapedGroupName = escapeLeadingUnderscores(groupName);
32473+
const types = map(capturingGroups, getTypeFromPatternUnion);
32474+
if (some(capturingGroups, patternUnion => patternUnion.isPossiblyUndefined!)) {
32475+
types.push(undefinedType);
32476+
}
32477+
const groupType = getUnionType(types);
32478+
namedCapturingGroupsTypeMembers.set(escapedGroupName, createProperty(escapedGroupName, groupType));
32479+
}
32480+
namedCapturingGroupsType = createAnonymousType(/*symbol*/ undefined, namedCapturingGroupsTypeMembers, emptyArray, emptyArray, emptyArray);
32481+
}
32482+
else {
32483+
namedCapturingGroupsType = undefinedType;
32484+
}
32485+
32486+
const regExpFlags = scanner.getRegExpFlags();
32487+
const flagsTypeMembers = createSymbolTable();
32488+
for (const [flag, propertyName] of regExpFlagToPropertyName) {
32489+
flagsTypeMembers.set(propertyName, createProperty(propertyName, regExpFlags & flag ? trueType : falseType));
32490+
}
32491+
const flagsType = createAnonymousType(/*symbol*/ undefined, flagsTypeMembers, emptyArray, emptyArray, emptyArray);
32492+
32493+
return getTypeAliasInstantiation(regExpTypeAlias, [capturingGroupsType, namedCapturingGroupsType, flagsType]);
32494+
32495+
function getTypeFromPatternUnion(patternUnion: RegularExpressionPatternUnion): Type {
32496+
let patternUnionType = patternUnionTypeCache.get(patternUnion);
32497+
if (!patternUnionType) {
32498+
const types = arrayFrom(patternUnion, pattern => {
32499+
if (typeof pattern === "string") {
32500+
return getStringLiteralType(pattern);
32501+
}
32502+
return getTemplateLiteralType(
32503+
/*texts*/ undefined,
32504+
/*types*/ map(pattern, content => {
32505+
if (typeof content === "string") {
32506+
return getStringLiteralType(content);
32507+
}
32508+
if (content === RegExpAnyString) {
32509+
return stringType;
32510+
}
32511+
if (content instanceof Set) {
32512+
return getTypeFromPatternUnion(content);
32513+
}
32514+
Debug.fail();
32515+
}),
32516+
/*isRegularExpression*/ true,
32517+
);
32518+
});
32519+
patternUnionType = getUnionType(types);
32520+
patternUnionTypeCache.set(patternUnion, patternUnionType);
32521+
}
32522+
return patternUnionType;
32523+
}
3243932524
}
32440-
return globalRegExpType;
32525+
return emptyGenericType;
3244132526
}
3244232527

3244332528
function checkSpreadExpression(node: SpreadElement, checkMode?: CheckMode): Type {
@@ -50415,7 +50500,6 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
5041550500
globalStringType = getGlobalType("String" as __String, /*arity*/ 0, /*reportErrors*/ true);
5041650501
globalNumberType = getGlobalType("Number" as __String, /*arity*/ 0, /*reportErrors*/ true);
5041750502
globalBooleanType = getGlobalType("Boolean" as __String, /*arity*/ 0, /*reportErrors*/ true);
50418-
globalRegExpType = getGlobalType("RegExp" as __String, /*arity*/ 0, /*reportErrors*/ true);
5041950503
anyArrayType = createArrayType(anyType);
5042050504

5042150505
autoArrayType = createArrayType(autoType);

src/compiler/core.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,12 @@ export function last<T>(array: readonly T[]): T {
11231123
return array[array.length - 1];
11241124
}
11251125

1126+
/** @internal */
1127+
export function setLast<T>(array: T[], value: T): T {
1128+
Debug.assert(array.length !== 0);
1129+
return array[array.length - 1] = value;
1130+
}
1131+
11261132
/**
11271133
* Returns the only element of an array if it contains only one element, `undefined` otherwise.
11281134
*

src/compiler/diagnosticMessages.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,10 +1705,6 @@
17051705
"category": "Error",
17061706
"code": 1512
17071707
},
1708-
"Undetermined character escape.": {
1709-
"category": "Error",
1710-
"code": 1513
1711-
},
17121708
"Expected a capturing group name.": {
17131709
"category": "Error",
17141710
"code": 1514
@@ -1834,6 +1830,10 @@
18341830
"category": "Error",
18351831
"code": 1544
18361832
},
1833+
"'\\k' is only available outside character class.": {
1834+
"category": "Error",
1835+
"code": 1545
1836+
},
18371837

18381838
"The types of '{0}' are incompatible between these types.": {
18391839
"category": "Error",

0 commit comments

Comments
 (0)