From 611fd33f58226352412c06f0b122464ef9d75fe9 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Thu, 17 Feb 2022 20:43:17 +0000 Subject: [PATCH] Update regex literal delimiters Update the lexing code for the replacement of the `'/.../'` and `'|...|'` delimiters with `#/.../#` and `#|...|#` respectively, in addition to allowing the `re'...'` delimiter. --- lib/Parse/Lexer.cpp | 28 +++++++++-------- test/StringProcessing/Parse/regex.swift | 22 ++++++++++---- .../Parse/regex_parse_end_of_buffer.swift | 2 +- .../Parse/regex_parse_error.swift | 7 +++-- .../Runtime/regex_basic.swift | 8 ++--- .../SILGen/regex_literal_silgen.swift | 4 +-- .../Sema/regex_literal_type_inference.swift | 30 +++++++++---------- .../update-checkout-config.json | 6 ++-- 8 files changed, 63 insertions(+), 44 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 307051c48a57a..96a3ef892168f 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1959,8 +1959,6 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body, } bool Lexer::tryLexRegexLiteral(const char *TokStart) { - assert(*TokStart == '\''); - // We need to have experimental string processing enabled, and have the // parsing logic for regex literals available. if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn) @@ -1995,7 +1993,6 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { // Otherwise, we either had a successful lex, or something that was // recoverable. - assert(ErrStr || CurPtr[-1] == '\''); formToken(tok::regex_literal, TokStart); return true; } @@ -2471,8 +2468,16 @@ void Lexer::lexImpl() { case '\\': return formToken(tok::backslash, TokStart); case '#': + // Try lex a raw string literal. if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags)) return lexStringLiteral(CustomDelimiterLen); + + // If we have experimental string processing enabled, try lex a regex + // literal. + if (tryLexRegexLiteral(TokStart)) + return; + + // Otherwise try lex a magic pound literal. return lexHash(); // Operator characters. @@ -2525,13 +2530,20 @@ void Lexer::lexImpl() { case '&': case '|': case '^': case '~': case '.': return lexOperatorIdentifier(); + case 'r': + // If we have experimental string processing enabled, try lex a regex + // literal. + if (tryLexRegexLiteral(TokStart)) + return; + LLVM_FALLTHROUGH; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'o': case 'p': case 'q': /*r above*/ case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '_': return lexIdentifier(); @@ -2544,14 +2556,6 @@ void Lexer::lexImpl() { return lexNumber(); case '\'': - // If we have experimental string processing enabled, and have the parsing - // logic for regex literals, try to lex a single quoted string as a regex - // literal. - if (tryLexRegexLiteral(TokStart)) - return; - - // Otherwise lex as a string literal and emit a diagnostic. - LLVM_FALLTHROUGH; case '"': return lexStringLiteral(); diff --git a/test/StringProcessing/Parse/regex.swift b/test/StringProcessing/Parse/regex.swift index ff699d098c89e..c4920ce04bb85 100644 --- a/test/StringProcessing/Parse/regex.swift +++ b/test/StringProcessing/Parse/regex.swift @@ -1,11 +1,23 @@ // RUN: %target-typecheck-verify-swift -enable-experimental-string-processing // REQUIRES: swift_in_compiler -_ = '/abc/' +_ = #/abc/# +_ = #|abc|# +_ = re'abc' -_ = ('/[*/', '/+]/', '/.]/') +func foo(_ x: T...) {} +foo(#/abc/#, #|abc|#, re'abc') + +let arr = [#/abc/#, #|abc|#, re'abc'] + +_ = #/\w+/#.self +_ = #|\w+|#.self +_ = re'\w+'.self + +_ = #/#/\/\#\\/# +_ = #|#|\|\#\\|# +_ = re're\r\e\'\\' + +_ = (#/[*/#, #/+]/#, #/.]/#) // expected-error@-1 {{cannot parse regular expression: quantifier '+' must appear after expression}} // expected-error@-2 {{cannot parse regular expression: expected ']'}} - -_ = '/\w+/' -_ = '/\'\\/' diff --git a/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift b/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift index 715cbeed9c7ca..e1cad9e7ae949 100644 --- a/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift +++ b/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift @@ -3,4 +3,4 @@ // Note there is purposefully no trailing newline here. // expected-error@+1 {{unterminated regex literal}} -var unterminated = '/xy \ No newline at end of file +var unterminated = #/xy \ No newline at end of file diff --git a/test/StringProcessing/Parse/regex_parse_error.swift b/test/StringProcessing/Parse/regex_parse_error.swift index 7265934ae718a..10a7b67fbd567 100644 --- a/test/StringProcessing/Parse/regex_parse_error.swift +++ b/test/StringProcessing/Parse/regex_parse_error.swift @@ -1,7 +1,10 @@ // RUN: %target-typecheck-verify-swift -enable-experimental-string-processing // REQUIRES: swift_in_compiler -let s = '/\\/''/ // expected-error {{unterminated regex literal}} +let s = #/\\/''/ // expected-error {{unterminated regex literal}} +_ = #|\| // expected-error {{unterminated regex literal}} +_ = #// // expected-error {{unterminated regex literal}} +_ = re'x // expected-error {{unterminated regex literal}} // expected-error@+1 {{unterminated regex literal}} -var unterminated = '/xy +var unterminated = #/xy diff --git a/test/StringProcessing/Runtime/regex_basic.swift b/test/StringProcessing/Runtime/regex_basic.swift index 856ce1295a033..fb0ee31d32775 100644 --- a/test/StringProcessing/Runtime/regex_basic.swift +++ b/test/StringProcessing/Runtime/regex_basic.swift @@ -23,11 +23,11 @@ extension String { RegexBasicTests.test("Basic") { let input = "aabccd" - let match1 = input.expectMatch('/aabcc./') + let match1 = input.expectMatch(#/aabcc./#) expectEqual("aabccd", input[match1.range]) expectTrue("aabccd" == match1.match) - let match2 = input.expectMatch('/a*b.+./') + let match2 = input.expectMatch(#/a*b.+./#) expectEqual("aabccd", input[match2.range]) expectTrue("aabccd" == match2.match) } @@ -35,7 +35,7 @@ RegexBasicTests.test("Basic") { RegexBasicTests.test("Modern") { let input = "aabccd" - let match1 = input.expectMatch('|a a bc c /*hello*/ .|') + let match1 = input.expectMatch(#|a a bc c /*hello*/ .|#) expectEqual("aabccd", input[match1.range]) expectTrue("aabccd" == match1.match) } @@ -45,7 +45,7 @@ RegexBasicTests.test("Captures") { A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \ COMBINING MARK TUKWENTIS """ - let regex = '/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/' + let regex = #/([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*/# // Test inferred type. let _: Regex<(Substring, Substring, Substring?, Substring)>.Type = type(of: regex) diff --git a/test/StringProcessing/SILGen/regex_literal_silgen.swift b/test/StringProcessing/SILGen/regex_literal_silgen.swift index b96cfabc3173a..27fb357efeb36 100644 --- a/test/StringProcessing/SILGen/regex_literal_silgen.swift +++ b/test/StringProcessing/SILGen/regex_literal_silgen.swift @@ -1,8 +1,8 @@ // RUN: %target-swift-frontend -emit-silgen -enable-experimental-string-processing %s | %FileCheck %s // REQUIRES: swift_in_compiler -var s = '/abc/' -// CHECK: [[REGEX_STR_LITERAL:%[0-9]+]] = string_literal utf8 "'/abc/'" +var s = #/abc/# +// CHECK: [[REGEX_STR_LITERAL:%[0-9]+]] = string_literal utf8 "#/abc/#" // CHECK: [[STRING_INIT:%[0-9]+]] = function_ref @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String // CHECK: [[REGEX_STR:%[0-9]+]] = apply [[STRING_INIT]]([[REGEX_STR_LITERAL]] diff --git a/test/StringProcessing/Sema/regex_literal_type_inference.swift b/test/StringProcessing/Sema/regex_literal_type_inference.swift index ebceb1897d209..bd645850886a8 100644 --- a/test/StringProcessing/Sema/regex_literal_type_inference.swift +++ b/test/StringProcessing/Sema/regex_literal_type_inference.swift @@ -1,13 +1,13 @@ // RUN: %target-typecheck-verify-swift -enable-experimental-string-processing // REQUIRES: swift_in_compiler -let r0 = '/./' +let r0 = #/./# let _: Regex = r0 func takesRegex(_: Regex) {} -takesRegex('//') // okay +takesRegex(#//#) // okay -let r1 = '/.(.)/' +let r1 = #/.(.)/# // Note: We test its type with a separate statement so that we know the type // checker inferred the regex's type independently without contextual types. let _: Regex<(Substring, Substring)>.Type = type(of: r1) @@ -15,34 +15,34 @@ let _: Regex<(Substring, Substring)>.Type = type(of: r1) struct S {} // expected-error @+2 {{cannot assign value of type 'Regex<(Substring, Substring)>' to type 'Regex'}} // expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, Substring)' and 'S') are expected to be equal}} -let r2: Regex = '/.(.)/' +let r2: Regex = #/.(.)/# -let r3 = '/(.)(.)/' +let r3 = #/(.)(.)/# let _: Regex<(Substring, Substring, Substring)>.Type = type(of: r3) -let r4 = '/(?