Skip to content

Revert "[Regex] Switch regex match to Swift tuples." #41325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions include/swift/AST/ASTContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,12 @@ class ASTContext final {
/// i.e. true if the entry is [key: alias_name, value: (real_name, true)].
mutable llvm::DenseMap<Identifier, std::pair<Identifier, bool>> ModuleAliasMap;

/// The maximum arity of `_StringProcessing.Tuple{n}`.
static constexpr unsigned StringProcessingTupleDeclMaxArity = 8;
/// Cached `_StringProcessing.Tuple{n}` declarations.
mutable SmallVector<StructDecl *, StringProcessingTupleDeclMaxArity - 2>
StringProcessingTupleDecls;

/// Retrieve the allocator for the given arena.
llvm::BumpPtrAllocator &
getAllocator(AllocationArena arena = AllocationArena::Permanent) const;
Expand Down Expand Up @@ -623,6 +629,14 @@ class ASTContext final {
/// Retrieve _StringProcessing.Regex.init(_regexString: String, version: Int).
ConcreteDeclRef getRegexInitDecl(Type regexType) const;

/// Retrieve the max arity that `_StringProcessing.Tuple{arity}` was
/// instantiated for.
unsigned getStringProcessingTupleDeclMaxArity() const;

/// Retrieve the `_StringProcessing.Tuple{arity}` declaration for the given
/// arity.
StructDecl *getStringProcessingTupleDecl(unsigned arity) const;

/// Retrieve the declaration of Swift.<(Int, Int) -> Bool.
FuncDecl *getLessThanIntDecl() const;

Expand Down
3 changes: 3 additions & 0 deletions include/swift/AST/DiagnosticsSema.def
Original file line number Diff line number Diff line change
Expand Up @@ -4832,6 +4832,9 @@ ERROR(string_processing_lib_missing,none,
ERROR(regex_capture_types_failed_to_decode,none,
"failed to decode capture types for regular expression literal; this may "
"be a compiler bug", ())
ERROR(regex_too_many_captures,none,
"too many captures in regular expression literal; the current limit is "
"%0", (unsigned))

//------------------------------------------------------------------------------
// MARK: Type Check Types
Expand Down
1 change: 1 addition & 0 deletions include/swift/AST/KnownSDKTypes.def
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,6 @@ KNOWN_SDK_TYPE_DECL(Distributed, RemoteCallTarget, StructDecl, 0)

// String processing
KNOWN_SDK_TYPE_DECL(StringProcessing, Regex, StructDecl, 1)
KNOWN_SDK_TYPE_DECL(StringProcessing, DynamicCaptures, EnumDecl, 0)

#undef KNOWN_SDK_TYPE_DECL
23 changes: 23 additions & 0 deletions lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,29 @@ ConcreteDeclRef ASTContext::getRegexInitDecl(Type regexType) const {
return ConcreteDeclRef(foundDecl, subs);
}

unsigned ASTContext::getStringProcessingTupleDeclMaxArity() const {
return StringProcessingTupleDeclMaxArity;
}

StructDecl *ASTContext::getStringProcessingTupleDecl(unsigned arity) const {
assert(arity >= 2);
if (arity > StringProcessingTupleDeclMaxArity)
return nullptr;
if (StringProcessingTupleDecls.empty())
StringProcessingTupleDecls.append(
StringProcessingTupleDeclMaxArity - 1, nullptr);
auto &decl = StringProcessingTupleDecls[arity - 2];
if (decl)
return decl;
SmallVector<ValueDecl *, 1> results;
auto *spModule = getLoadedModule(Id_StringProcessing);
auto typeName = getIdentifier("Tuple" + llvm::utostr(arity));
spModule->lookupQualified(
spModule, DeclNameRef(typeName), NL_OnlyTypes, results);
assert(results.size() == 1);
return (decl = cast<StructDecl>(results[0]));
}

static
FuncDecl *getBinaryComparisonOperatorIntDecl(const ASTContext &C, StringRef op,
FuncDecl *&cached) {
Expand Down
19 changes: 13 additions & 6 deletions lib/Sema/CSGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1267,20 +1267,27 @@ namespace {
ctx.Id_Regex.str());
return Type();
}
SmallVector<TupleTypeElt, 4> matchElements {ctx.getSubstringType()};
SmallVector<Type, 4> matchTypes {ctx.getSubstringType()};
if (decodeRegexCaptureTypes(ctx,
E->getSerializedCaptureStructure(),
/*atomType*/ ctx.getSubstringType(),
matchElements)) {
matchTypes)) {
ctx.Diags.diagnose(E->getLoc(),
diag::regex_capture_types_failed_to_decode);
return Type();
}
if (matchElements.size() == 1)
if (matchTypes.size() == 1)
return BoundGenericStructType::get(
regexDecl, Type(), matchElements.front().getType());
// Form a tuple.
auto matchType = TupleType::get(matchElements, ctx);
regexDecl, Type(), matchTypes.front());
// Form a `_StringProcessing.Tuple{n}<...>`.
auto *tupleDecl = ctx.getStringProcessingTupleDecl(matchTypes.size());
if (!tupleDecl) {
ctx.Diags.diagnose(E->getLoc(), diag::regex_too_many_captures,
ctx.getStringProcessingTupleDeclMaxArity() - 1);
return Type();
}
auto matchType = BoundGenericStructType::get(
tupleDecl, Type(), matchTypes);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
}

Expand Down
26 changes: 15 additions & 11 deletions lib/Sema/TypeCheckRegex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ using namespace swift;
bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
ArrayRef<uint8_t> serialization,
Type atomType,
SmallVectorImpl<TupleTypeElt> &result) {
SmallVectorImpl<Type> &result) {
using Version = RegexLiteralExpr::CaptureStructureSerializationVersion;
static const Version implVersion = 1;
unsigned size = serialization.size();
Expand All @@ -46,7 +46,7 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (version != implVersion)
return true;
// Read contents.
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
SmallVector<SmallVector<Type, 4>, 4> scopes(1);
unsigned offset = sizeof(Version);
auto consumeCode = [&]() -> Optional<RegexCaptureStructureCode> {
auto rawValue = serialization[offset];
Expand All @@ -73,29 +73,33 @@ bool swift::decodeRegexCaptureTypes(ASTContext &ctx,
if (length >= size - offset)
return true; // Unterminated string.
StringRef name(namePtr, length);
scopes.back().push_back(
TupleTypeElt(atomType, ctx.getIdentifier(name)));
// The name is currently unused becuase we are forming a nominal
// `Tuple{n}` type. We will switch back to native tuples when there is
// variadic generics.
(void)name;
scopes.back().push_back(atomType);
offset += length + /*NUL*/ 1;
break;
}
case RegexCaptureStructureCode::FormArray: {
auto &element = scopes.back().back();
element = TupleTypeElt(ArraySliceType::get(element.getType()),
element.getName());
auto &type = scopes.back().back();
type = ArraySliceType::get(type);
break;
}
case RegexCaptureStructureCode::FormOptional: {
auto &element = scopes.back().back();
element = TupleTypeElt(OptionalType::get(element.getType()),
element.getName());
auto &type = scopes.back().back();
type = OptionalType::get(type);
break;
}
case RegexCaptureStructureCode::BeginTuple:
scopes.push_back({});
break;
case RegexCaptureStructureCode::EndTuple: {
auto children = scopes.pop_back_val();
auto type = TupleType::get(children, ctx);
if (children.size() > ctx.getStringProcessingTupleDeclMaxArity())
return true;
auto tupleDecl = ctx.getStringProcessingTupleDecl(children.size());
auto type = BoundGenericStructType::get(tupleDecl, Type(), children);
scopes.back().push_back(type);
break;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Sema/TypeCheckRegex.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ enum class RegexCaptureStructureCode: uint8_t {
bool decodeRegexCaptureTypes(ASTContext &ctx,
llvm::ArrayRef<uint8_t> serialization,
Type atomType,
llvm::SmallVectorImpl<TupleTypeElt> &result);
llvm::SmallVectorImpl<Type> &result);

} // end namespace swift

Expand Down
3 changes: 1 addition & 2 deletions test/StringProcessing/Parse/regex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
_ = '/abc/'

_ = ('/[*/', '/+]/', '/.]/')
// expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}}
// expected-error@-2 {{cannot parse regular expression: expected ']'}}
// expected-error@-1 {{cannot parse regular expression}}

_ = '/\w+/'
_ = '/\'\\/'
2 changes: 1 addition & 1 deletion test/StringProcessing/Runtime/regex_basic.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ RegexBasicTests.test("Captures") {
"""
let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/'
// Test inferred type.
let _: Regex<(Substring, Substring, Substring?, Substring)>.Type
let _: Regex<Tuple4<Substring, Substring, Substring?, Substring>>.Type
= type(of: regex)
let match1 = input.expectMatch(regex)
expectEqual(input[...], input[match1.range])
Expand Down
4 changes: 2 additions & 2 deletions test/StringProcessing/SILGen/regex_literal_silgen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ var s = '/abc/'
// CHECK: [[INT_INIT:%[0-9]+]] = function_ref @$sSi22_builtinIntegerLiteralSiBI_tcfC : $@convention(method) (Builtin.IntLiteral, @thin Int.Type) -> Int
// CHECK: [[VERSION_INT:%[0-9]+]] = apply [[INT_INIT]]([[VERSION_LITERAL]]

// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA07versionACyxGSS_SitcfC : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<{{.+}}>({{%.+}}, [[REGEX_STR]], [[VERSION_INT]], {{%.+}}) : $@convention(method) <τ_0_0 where τ_0_0 : MatchProtocol> (@owned String, Int, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
32 changes: 17 additions & 15 deletions test/StringProcessing/Sema/regex_literal_type_inference.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,41 @@ takesRegex('//') // okay
let r1 = '/.(.)/'
// Note: We test its type with a separate statement so that we know the type
// checker inferred the regex's type independently without contextual types.
let _: Regex<(Substring, Substring)>.Type = type(of: r1)
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r1)

struct S {}
// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}}
struct S: MatchProtocol {
typealias Capture = Substring
}
// expected-error @+2 {{cannot assign value of type 'Regex<Tuple2<Substring, Substring>>' to type 'Regex<S>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('Tuple2<Substring, Substring>' and 'S') are expected to be equal}}
let r2: Regex<S> = '/.(.)/'

let r3 = '/(.)(.)/'
let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3)
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r3)

let r4 = '/(?<label>.)(.)/'
let _: Regex<(Substring, label: Substring, Substring)>.Type = type(of: r4)
let _: Regex<Tuple3<Substring, Substring, Substring>>.Type = type(of: r4)

let r5 = '/(.(.(.)))/'
let _: Regex<(Substring, Substring, Substring, Substring)>.Type = type(of: r5)
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r5)

let r6 = '/(?'we'.(?'are'.(?'regex'.)+)?)/'
let _: Regex<(Substring, we: Substring, are: Substring?, regex: [Substring]?)>.Type = type(of: r6)
let r6 = '/(?'we'.(?'are'.(?'regex'.)))/'
let _: Regex<Tuple4<Substring, Substring, Substring, Substring>>.Type = type(of: r6)

let r7 = '/(?:(?:(.(.(.)*)?))*?)?/'
// ^ 1
// ^ 2
// ^ 3
let _: Regex<(Substring, [Substring]?, [Substring?]?, [[Substring]?]?)>.Type = type(of: r7)
let _: Regex<Tuple4<Substring, [Substring]?, [Substring?]?, [[Substring]?]?>>.Type = type(of: r7)

let r8 = '/well(?<theres_no_single_element_tuple_what_can_we>do)/'
let _: Regex<(Substring, theres_no_single_element_tuple_what_can_we: Substring)>.Type = type(of: r8)
let _: Regex<Tuple2<Substring, Substring>>.Type = type(of: r8)

let r9 = '/(a)|(b)|(c)|d/'
let _: Regex<(Substring, Substring?, Substring?, Substring?)>.Type = type(of: r9)
let _: Regex<Tuple4<Substring, Substring?, Substring?, Substring?>>.Type = type(of: r9)

let r10 = '/(a)|b/'
let _: Regex<(Substring, Substring?)>.Type = type(of: r10)
let _: Regex<Tuple2<Substring, Substring?>>.Type = type(of: r10)

let r11 = '/()()()()()()()()/'
let _: Regex<(Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring, Substring)>.Type = type(of: r11)
// expected-error @+1 {{too many captures in regular expression literal; the current limit is 7}}
let r11 = '/()()()()()()()()/' // 8 captures, too many for our prototype
6 changes: 3 additions & 3 deletions utils/update_checkout/update-checkout-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"swift-cmark-gfm": "gfm",
"swift-nio": "2.31.2",
"swift-nio-ssl": "2.15.0",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/5"
}
},
"rebranch": {
Expand Down Expand Up @@ -157,7 +157,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/5"
}
},
"release/5.6": {
Expand Down Expand Up @@ -308,7 +308,7 @@
"sourcekit-lsp": "main",
"swift-format": "main",
"swift-installer-scripts": "main",
"swift-experimental-string-processing": "dev/6"
"swift-experimental-string-processing": "dev/5"
}
},
"release/5.4": {
Expand Down