Skip to content

Commit 3efcebf

Browse files
authored
Merge pull request #41325 from apple/revert-41275-regex-update-dev-6
Revert "[Regex] Switch regex match to Swift tuples."
2 parents 30e2945 + dcd9e8e commit 3efcebf

File tree

12 files changed

+94
-41
lines changed

12 files changed

+94
-41
lines changed

include/swift/AST/ASTContext.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,12 @@ class ASTContext final {
367367
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
368368
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;
369369

370+
/// The maximum arity of `_StringProcessing.Tuple{n}`.
371+
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
372+
/// Cached `_StringProcessing.Tuple{n}` declarations.
373+
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
374+
StringProcessingTupleDecls;
375+
370376
/// Retrieve the allocator for the given arena.
371377
llvm::BumpPtrAllocator &
372378
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
@@ -623,6 +629,14 @@ class ASTContext final {
623629
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
624630
ConcreteDeclRef getRegexInitDecl(Type regexType) const;
625631

632+
/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
633+
/// instantiated for.
634+
unsigned getStringProcessingTupleDeclMaxArity() const;
635+
636+
/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
637+
/// arity.
638+
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;
639+
626640
/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
627641
FuncDecl *getLessThanIntDecl() const;
628642

include/swift/AST/DiagnosticsSema.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4832,6 +4832,9 @@ ERROR(string_processing_lib_missing,none,
48324832
ERROR(regex_capture_types_failed_to_decode,none,
48334833
"failed to decode capture types for regular expression literal; this may "
48344834
"be a compiler bug", ())
4835+
ERROR(regex_too_many_captures,none,
4836+
"too many captures in regular expression literal; the current limit is "
4837+
"%0", (unsigned))
48354838

48364839
//------------------------------------------------------------------------------
48374840
// MARK: Type Check Types

include/swift/AST/KnownSDKTypes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,6 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)
4949

5050
// String processing
5151
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
52+
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)
5253

5354
#undef KNOWN_SDK_TYPE_DECL

lib/AST/ASTContext.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,29 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
12251225
return ConcreteDeclRef(foundDecl, subs);
12261226
}
12271227

1228+
unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
1229+
return StringProcessingTupleDeclMaxArity;
1230+
}
1231+
1232+
StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
1233+
assert(arity >= 2);
1234+
if (arity > StringProcessingTupleDeclMaxArity)
1235+
return nullptr;
1236+
if (StringProcessingTupleDecls.empty())
1237+
StringProcessingTupleDecls.append(
1238+
StringProcessingTupleDeclMaxArity - 1, nullptr);
1239+
auto &decl = StringProcessingTupleDecls[arity - 2];
1240+
if (decl)
1241+
return decl;
1242+
SmallVector<ValueDecl *, 1> results;
1243+
auto *spModule = getLoadedModule(Id_StringProcessing);
1244+
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
1245+
spModule->lookupQualified(
1246+
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
1247+
assert(results.size() == 1);
1248+
return (decl = cast<StructDecl>(results[0]));
1249+
}
1250+
12281251
static
12291252
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
12301253
FuncDecl *&cached) {

lib/Sema/CSGen.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,20 +1267,27 @@ namespace {
12671267
ctx.Id_Regex.str());
12681268
return Type();
12691269
}
1270-
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
1270+
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
12711271
if (decodeRegexCaptureTypes(ctx,
12721272
E->getSerializedCaptureStructure(),
12731273
/*atomType*/ ctx.getSubstringType(),
1274-
matchElements)) {
1274+
matchTypes)) {
12751275
ctx.Diags.diagnose(E->getLoc(),
12761276
diag::regex_capture_types_failed_to_decode);
12771277
return Type();
12781278
}
1279-
if (matchElements.size() == 1)
1279+
if (matchTypes.size() == 1)
12801280
return BoundGenericStructType::get(
1281-
regexDecl, Type(), matchElements.front().getType());
1282-
// Form a tuple.
1283-
auto matchType = TupleType::get(matchElements, ctx);
1281+
regexDecl, Type(), matchTypes.front());
1282+
// Form a `_StringProcessing.Tuple{n}<...>`.
1283+
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
1284+
if (!tupleDecl) {
1285+
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
1286+
ctx.getStringProcessingTupleDeclMaxArity() - 1);
1287+
return Type();
1288+
}
1289+
auto matchType = BoundGenericStructType::get(
1290+
tupleDecl, Type(), matchTypes);
12841291
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
12851292
}
12861293

lib/Sema/TypeCheckRegex.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ using namespace swift;
3333
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
3434
ArrayRef<uint8_t> serialization,
3535
Type atomType,
36-
SmallVectorImpl<TupleTypeElt> &result) {
36+
SmallVectorImpl<Type> &result) {
3737
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
3838
static const Version implVersion = 1;
3939
unsigned size = serialization.size();
@@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
4646
if (version != implVersion)
4747
return true;
4848
// Read contents.
49-
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
49+
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
5050
unsigned offset = sizeof(Version);
5151
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
5252
auto rawValue = serialization[offset];
@@ -73,29 +73,33 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
7373
if (length >= size - offset)
7474
return true; // Unterminated string.
7575
StringRef name(namePtr, length);
76-
scopes.back().push_back(
77-
TupleTypeElt(atomType, ctx.getIdentifier(name)));
76+
// The name is currently unused becuase we are forming a nominal
77+
// `Tuple{n}` type. We will switch back to native tuples when there is
78+
// variadic generics.
79+
(void)name;
80+
scopes.back().push_back(atomType);
7881
offset += length + /*NUL*/ 1;
7982
break;
8083
}
8184
case RegexCaptureStructureCode::FormArray: {
82-
auto &element = scopes.back().back();
83-
element = TupleTypeElt(ArraySliceType::get(element.getType()),
84-
element.getName());
85+
auto &type = scopes.back().back();
86+
type = ArraySliceType::get(type);
8587
break;
8688
}
8789
case RegexCaptureStructureCode::FormOptional: {
88-
auto &element = scopes.back().back();
89-
element = TupleTypeElt(OptionalType::get(element.getType()),
90-
element.getName());
90+
auto &type = scopes.back().back();
91+
type = OptionalType::get(type);
9192
break;
9293
}
9394
case RegexCaptureStructureCode::BeginTuple:
9495
scopes.push_back({});
9596
break;
9697
case RegexCaptureStructureCode::EndTuple: {
9798
auto children = scopes.pop_back_val();
98-
auto type = TupleType::get(children, ctx);
99+
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
100+
return true;
101+
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
102+
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
99103
scopes.back().push_back(type);
100104
break;
101105
}

lib/Sema/TypeCheckRegex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
4040
bool decodeRegexCaptureTypes(ASTContext &ctx,
4141
llvm::ArrayRef<uint8_t> serialization,
4242
Type atomType,
43-
llvm::SmallVectorImpl<TupleTypeElt> &result);
43+
llvm::SmallVectorImpl<Type> &result);
4444

4545
} // end namespace swift
4646

test/StringProcessing/Parse/regex.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
_ = '/abc/'
55

66
_ = ('/[*/', '/+]/', '/.]/')
7-
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
8-
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
7+
// expected-error@-1 {{cannot parse regular expression}}
98

109
_ = '/\w+/'
1110
_ = '/\'\\/'

test/StringProcessing/Runtime/regex_basic.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
4747
"""
4848
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
4949
// Test inferred type.
50-
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
50+
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
5151
= type(of: regex)
5252
let match1 = input.expectMatch(regex)
5353
expectEqual(input[...], input[match1.range])

test/StringProcessing/SILGen/regex_literal_silgen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ var s = '/abc/'
1010
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
1111
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]
1212

13-
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14-
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
13+
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
14+
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>

0 commit comments

Comments
 (0)