diff --git a/include/swift/AST/ASTContext.h b/include/swift/AST/ASTContext.h index 5d44d263cc3e9..d257906458dd9 100644 --- a/include/swift/AST/ASTContext.h +++ b/include/swift/AST/ASTContext.h @@ -367,6 +367,12 @@ class ASTContext final { /// i.e. true if the entry is [key: alias_name, value: (real_name, true)]. mutable llvm::DenseMap> ModuleAliasMap; + /// The maximum arity of `_StringProcessing.Tuple{n}`. + static constexpr unsigned StringProcessingTupleDeclMaxArity = 8; + /// Cached `_StringProcessing.Tuple{n}` declarations. + mutable SmallVector + StringProcessingTupleDecls; + /// Retrieve the allocator for the given arena. llvm::BumpPtrAllocator & getAllocator(AllocationArena arena = AllocationArena::Permanent) const; @@ -623,6 +629,14 @@ class ASTContext final { /// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int). ConcreteDeclRef getRegexInitDecl(Type regexType) const; + /// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was + /// instantiated for. + unsigned getStringProcessingTupleDeclMaxArity() const; + + /// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given + /// arity. + StructDecl *getStringProcessingTupleDecl(unsigned arity) const; + /// Retrieve the declaration of Swift.<(Int, Int) -> Bool. FuncDecl *getLessThanIntDecl() const; diff --git a/include/swift/AST/DiagnosticsSema.def b/include/swift/AST/DiagnosticsSema.def index 2235371d4c39a..6287551fbbe4c 100644 --- a/include/swift/AST/DiagnosticsSema.def +++ b/include/swift/AST/DiagnosticsSema.def @@ -4832,6 +4832,9 @@ ERROR(string_processing_lib_missing,none, ERROR(regex_capture_types_failed_to_decode,none, "failed to decode capture types for regular expression literal; this may " "be a compiler bug", ()) +ERROR(regex_too_many_captures,none, + "too many captures in regular expression literal; the current limit is " + "%0", (unsigned)) //------------------------------------------------------------------------------ // MARK: Type Check Types diff --git a/include/swift/AST/KnownSDKTypes.def b/include/swift/AST/KnownSDKTypes.def index 449e4cecc3dc9..ec83ad0fa27e2 100644 --- a/include/swift/AST/KnownSDKTypes.def +++ b/include/swift/AST/KnownSDKTypes.def @@ -49,5 +49,6 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0) // String processing KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1) +KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0) #undef KNOWN_SDK_TYPE_DECL diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 0fed20ea24eeb..5e912c3d49bc2 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -1225,6 +1225,29 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const { return ConcreteDeclRef(foundDecl, subs); } +unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const { + return StringProcessingTupleDeclMaxArity; +} + +StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const { + assert(arity >= 2); + if (arity > StringProcessingTupleDeclMaxArity) + return nullptr; + if (StringProcessingTupleDecls.empty()) + StringProcessingTupleDecls.append( + StringProcessingTupleDeclMaxArity - 1, nullptr); + auto &decl = StringProcessingTupleDecls[arity - 2]; + if (decl) + return decl; + SmallVector results; + auto *spModule = getLoadedModule(Id_StringProcessing); + auto typeName = getIdentifier("Tuple" + llvm::utostr(arity)); + spModule->lookupQualified( + spModule, DeclNameRef(typeName), NL_OnlyTypes, results); + assert(results.size() == 1); + return (decl = cast(results[0])); +} + static FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op, FuncDecl *&cached) { diff --git a/lib/Sema/CSGen.cpp b/lib/Sema/CSGen.cpp index 9d292b284a53b..e06822940d3bb 100644 --- a/lib/Sema/CSGen.cpp +++ b/lib/Sema/CSGen.cpp @@ -1267,20 +1267,27 @@ namespace { ctx.Id_Regex.str()); return Type(); } - SmallVector matchElements {ctx.getSubstringType()}; + SmallVector matchTypes {ctx.getSubstringType()}; if (decodeRegexCaptureTypes(ctx, E->getSerializedCaptureStructure(), /*atomType*/ ctx.getSubstringType(), - matchElements)) { + matchTypes)) { ctx.Diags.diagnose(E->getLoc(), diag::regex_capture_types_failed_to_decode); return Type(); } - if (matchElements.size() == 1) + if (matchTypes.size() == 1) return BoundGenericStructType::get( - regexDecl, Type(), matchElements.front().getType()); - // Form a tuple. - auto matchType = TupleType::get(matchElements, ctx); + regexDecl, Type(), matchTypes.front()); + // Form a `_StringProcessing.Tuple{n}<...>`. + auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size()); + if (!tupleDecl) { + ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures, + ctx.getStringProcessingTupleDeclMaxArity() - 1); + return Type(); + } + auto matchType = BoundGenericStructType::get( + tupleDecl, Type(), matchTypes); return BoundGenericStructType::get(regexDecl, Type(), {matchType}); } diff --git a/lib/Sema/TypeCheckRegex.cpp b/lib/Sema/TypeCheckRegex.cpp index d8f86b3d8d8e7..a19483447fdf3 100644 --- a/lib/Sema/TypeCheckRegex.cpp +++ b/lib/Sema/TypeCheckRegex.cpp @@ -33,7 +33,7 @@ using namespace swift; bool swift::decodeRegexCaptureTypes(ASTContext &ctx, ArrayRef serialization, Type atomType, - SmallVectorImpl &result) { + SmallVectorImpl &result) { using Version = RegexLiteralExpr::CaptureStructureSerializationVersion; static const Version implVersion = 1; unsigned size = serialization.size(); @@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx, if (version != implVersion) return true; // Read contents. - SmallVector, 4> scopes(1); + SmallVector, 4> scopes(1); unsigned offset = sizeof(Version); auto consumeCode = [&]() -> Optional { auto rawValue = serialization[offset]; @@ -73,21 +73,22 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx, if (length >= size - offset) return true; // Unterminated string. StringRef name(namePtr, length); - scopes.back().push_back( - TupleTypeElt(atomType, ctx.getIdentifier(name))); + // The name is currently unused becuase we are forming a nominal + // `Tuple{n}` type. We will switch back to native tuples when there is + // variadic generics. + (void)name; + scopes.back().push_back(atomType); offset += length + /*NUL*/ 1; break; } case RegexCaptureStructureCode::FormArray: { - auto &element = scopes.back().back(); - element = TupleTypeElt(ArraySliceType::get(element.getType()), - element.getName()); + auto &type = scopes.back().back(); + type = ArraySliceType::get(type); break; } case RegexCaptureStructureCode::FormOptional: { - auto &element = scopes.back().back(); - element = TupleTypeElt(OptionalType::get(element.getType()), - element.getName()); + auto &type = scopes.back().back(); + type = OptionalType::get(type); break; } case RegexCaptureStructureCode::BeginTuple: @@ -95,7 +96,10 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx, break; case RegexCaptureStructureCode::EndTuple: { auto children = scopes.pop_back_val(); - auto type = TupleType::get(children, ctx); + if (children.size() > ctx.getStringProcessingTupleDeclMaxArity()) + return true; + auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size()); + auto type = BoundGenericStructType::get(tupleDecl, Type(), children); scopes.back().push_back(type); break; } diff --git a/lib/Sema/TypeCheckRegex.h b/lib/Sema/TypeCheckRegex.h index 8d189ec051570..4142922152a76 100644 --- a/lib/Sema/TypeCheckRegex.h +++ b/lib/Sema/TypeCheckRegex.h @@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t { bool decodeRegexCaptureTypes(ASTContext &ctx, llvm::ArrayRef serialization, Type atomType, - llvm::SmallVectorImpl &result); + llvm::SmallVectorImpl &result); } // end namespace swift diff --git a/test/StringProcessing/Parse/regex.swift b/test/StringProcessing/Parse/regex.swift index ff699d098c89e..bbdea43cd1b6d 100644 --- a/test/StringProcessing/Parse/regex.swift +++ b/test/StringProcessing/Parse/regex.swift @@ -4,8 +4,7 @@ _ = '/abc/' _ = ('/[*/', '/+]/', '/.]/') -// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}} -// expected-error@-2 {{cannot parse regular expression: expected ']'}} +// expected-error@-1 {{cannot parse regular expression}} _ = '/\w+/' _ = '/\'\\/' diff --git a/test/StringProcessing/Runtime/regex_basic.swift b/test/StringProcessing/Runtime/regex_basic.swift index 856ce1295a033..2a392e7ec8d07 100644 --- a/test/StringProcessing/Runtime/regex_basic.swift +++ b/test/StringProcessing/Runtime/regex_basic.swift @@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") { """ let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/' // Test inferred type. - let _: Regex<(Substring, Substring, Substring?, Substring)>.Type + let _: Regex>.Type = type(of: regex) let match1 = input.expectMatch(regex) expectEqual(input[...], input[match1.range]) diff --git a/test/StringProcessing/SILGen/regex_literal_silgen.swift b/test/StringProcessing/SILGen/regex_literal_silgen.swift index b96cfabc3173a..217f7caa9d8bd 100644 --- a/test/StringProcessing/SILGen/regex_literal_silgen.swift +++ b/test/StringProcessing/SILGen/regex_literal_silgen.swift @@ -10,5 +10,5 @@ var s = '/abc/' // CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int // CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]] -// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0> -// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0> +// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0> +// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0> diff --git a/test/StringProcessing/Sema/regex_literal_type_inference.swift b/test/StringProcessing/Sema/regex_literal_type_inference.swift index ebceb1897d209..5ffb20e615bcd 100644 --- a/test/StringProcessing/Sema/regex_literal_type_inference.swift +++ b/test/StringProcessing/Sema/regex_literal_type_inference.swift @@ -10,39 +10,41 @@ takesRegex('//') // okay let r1 = '/.(.)/' // Note: We test its type with a separate statement so that we know the type // checker inferred the regex's type independently without contextual types. -let _: Regex<(Substring, Substring)>.Type = type(of: r1) +let _: Regex>.Type = type(of: r1) -struct S {} -// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex'}} -// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}} +struct S: MatchProtocol { + typealias Capture = Substring +} +// expected-error @+2 {{cannot assign value of type 'Regex>' to type 'Regex'}} +// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2' and 'S') are expected to be equal}} let r2: Regex = '/.(.)/' let r3 = '/(.)(.)/' -let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3) +let _: Regex>.Type = type(of: r3) let r4 = '/(?