From 166688dc46708271564777e9081ea98dd517b689 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 10 May 2022 16:04:16 +0100 Subject: [PATCH 1/3] Allow backticks on operators --- include/swift/Parse/Lexer.h | 4 + include/swift/Parse/Token.h | 14 +++- lib/Parse/Lexer.cpp | 123 +++++++++++++++++++------------ lib/Parse/ParseDecl.cpp | 4 +- test/Parse/operator_escape.swift | 26 +++++++ 5 files changed, 120 insertions(+), 51 deletions(-) create mode 100644 test/Parse/operator_escape.swift diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index 6a035e01bf273..dc05853b97c81 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -608,6 +608,10 @@ class Lexer { void formStringLiteralToken(const char *TokStart, bool IsMultilineString, unsigned CustomDelimiterLen); + /// Form an operator token starting at \p TokStart. \p OperEnd is the last + /// character, not including backticks. + void formOperatorToken(const char *TokStart, const char *OperEnd); + /// Advance to the end of the line. /// If EatNewLine is true, CurPtr will be at end of newline character. /// Otherwise, CurPtr will be at newline character. diff --git a/include/swift/Parse/Token.h b/include/swift/Parse/Token.h index f48e003904f95..8e6acd2a92089 100644 --- a/include/swift/Parse/Token.h +++ b/include/swift/Parse/Token.h @@ -41,6 +41,8 @@ class Token { /// Whether this token is an escaped `identifier` token. unsigned EscapedIdentifier : 1; + + unsigned EscapedOperator : 1; /// Modifiers for string literals unsigned MultilineString : 1; @@ -65,8 +67,8 @@ class Token { public: Token(tok Kind, StringRef Text, unsigned CommentLength = 0) : Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false), - MultilineString(false), CustomDelimiterLen(0), - CommentLength(CommentLength), Text(Text) {} + EscapedOperator(false), MultilineString(false), + CustomDelimiterLen(0), CommentLength(CommentLength), Text(Text) {} Token() : Token(tok::NUM_TOKENS, {}, 0) {} @@ -128,6 +130,11 @@ class Token { "only identifiers can be escaped identifiers"); EscapedIdentifier = value; } + + bool isEscapedOperator() const { return EscapedOperator; } + void setEscapedOperator(bool value) { + EscapedOperator = value; + } bool isContextualKeyword(StringRef ContextKW) const { return isAny(tok::identifier, tok::contextual_keyword) && @@ -276,7 +283,7 @@ class Token { } StringRef getText() const { - if (EscapedIdentifier) { + if (EscapedIdentifier || EscapedOperator) { // Strip off the backticks on either side. assert(Text.front() == '`' && Text.back() == '`'); return Text.slice(1, Text.size() - 1); @@ -292,6 +299,7 @@ class Token { Text = T; this->CommentLength = CommentLength; EscapedIdentifier = false; + EscapedOperator = false; this->MultilineString = false; this->CustomDelimiterLen = 0; assert(this->CustomDelimiterLen == CustomDelimiterLen && diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 00c8bddcd8baf..1eb86321c24d7 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -792,49 +792,7 @@ static bool rangeContainsPlaceholderEnd(const char *CurPtr, return false; } -/// lexOperatorIdentifier - Match identifiers formed out of punctuation. -void Lexer::lexOperatorIdentifier() { - const char *TokStart = CurPtr-1; - CurPtr = TokStart; - bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd); - assert(didStart && "unexpected operator start"); - (void) didStart; - - do { - if (CurPtr != BufferEnd && InSILBody && - (*CurPtr == '!' || *CurPtr == '?')) - // When parsing SIL body, '!' and '?' are special token and can't be - // in the middle of an operator. - break; - - // '.' cannot appear in the middle of an operator unless the operator - // started with a '.'. - if (*CurPtr == '.' && *TokStart != '.') - break; - if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) && - rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) { - break; - } - - // If we are lexing a `/.../` regex literal, we don't consider `/` to be an - // operator character. - if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None && - *CurPtr == '/') { - break; - } - } while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd)); - - if (CurPtr-TokStart > 2) { - // If there is a "//" or "/*" in the middle of an identifier token, - // it starts a comment. - for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) { - if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) { - CurPtr = Ptr; - break; - } - } - } - +void Lexer::formOperatorToken(const char *TokStart, const char *OperEnd) { // Decide between the binary, prefix, and postfix cases. // It's binary if either both sides are bound or both sides are not bound. // Otherwise, it's postfix if left-bound and prefix if right-bound. @@ -842,7 +800,7 @@ void Lexer::lexOperatorIdentifier() { bool rightBound = isRightBound(CurPtr, leftBound, CodeCompletionPtr); // Match various reserved words. - if (CurPtr-TokStart == 1) { + if (OperEnd-TokStart == 1) { switch (TokStart[0]) { case '=': // Refrain from emitting this message in operator name position. @@ -901,7 +859,7 @@ void Lexer::lexOperatorIdentifier() { return formToken(tok::question_postfix, TokStart); return formToken(tok::question_infix, TokStart); } - } else if (CurPtr-TokStart == 2) { + } else if (OperEnd-TokStart == 2) { switch ((TokStart[0] << 8) | TokStart[1]) { case ('-' << 8) | '>': // -> return formToken(tok::arrow, TokStart); @@ -912,7 +870,7 @@ void Lexer::lexOperatorIdentifier() { } else { // Verify there is no "*/" in the middle of the identifier token, we reject // it as potentially ending a block comment. - auto Pos = StringRef(TokStart, CurPtr-TokStart).find("*/"); + auto Pos = StringRef(TokStart, OperEnd-TokStart).find("*/"); if (Pos != StringRef::npos) { diagnose(TokStart+Pos, diag::lex_unexpected_block_comment_end); return formToken(tok::unknown, TokStart); @@ -926,6 +884,75 @@ void Lexer::lexOperatorIdentifier() { return formToken(leftBound ? tok::oper_postfix : tok::oper_prefix, TokStart); } +/// lexOperatorIdentifier - Match identifiers formed out of punctuation. +void Lexer::lexOperatorIdentifier() { + auto *const TokStart = CurPtr-1; + + auto HadBacktick = (*TokStart == '`'); + if (!HadBacktick) + CurPtr = TokStart; + + auto *const OperStart = CurPtr; + + bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd); + assert(didStart && "unexpected operator start"); + (void) didStart; + + do { + if (CurPtr != BufferEnd && InSILBody && + (*CurPtr == '!' || *CurPtr == '?')) + // When parsing SIL body, '!' and '?' are special token and can't be + // in the middle of an operator. + break; + + // '.' cannot appear in the middle of an operator unless the operator + // started with a '.'. + if (*CurPtr == '.' && *TokStart != '.') + break; + if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) && + rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) { + break; + } + + // If we are lexing a `/.../` regex literal, we don't consider `/` to be an + // operator character. + if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None && + *CurPtr == '/') { + break; + } + } while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd)); + + if (CurPtr-TokStart > 2) { + // If there is a "//" or "/*" in the middle of an identifier token, + // it starts a comment. + for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) { + if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) { + CurPtr = Ptr; + break; + } + } + } + + auto *const OperEnd = CurPtr; + if (HadBacktick) { + if (*OperEnd != '`') { + // The backtick is punctuation. + CurPtr = OperStart; + return formToken(tok::backtick, TokStart); + } + ++CurPtr; + } + + formOperatorToken(TokStart, OperEnd); + if (HadBacktick) { + // If this token is at ArtificialEOF, it's forced to be tok::eof. Don't mark + // this as escaped-operator in this case. Also don't mark if we had + // something unrecoverable. + if (!NextToken.is(tok::eof) && !NextToken.is(tok::unknown)) + NextToken.setEscapedOperator(true); + } +} + /// lexDollarIdent - Match $[0-9a-zA-Z_$]+ void Lexer::lexDollarIdent() { const char *tokStart = CurPtr-1; @@ -2652,6 +2679,10 @@ void Lexer::lexImpl() { return lexStringLiteral(); case '`': + auto *Tmp = CurPtr; + if (advanceIfValidStartOfOperator(Tmp, BufferEnd)) + return lexOperatorIdentifier(); + return lexEscapedIdentifier(); } } diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 14c7eb8a5f2cd..a09a4e7c1d40e 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8535,8 +8535,8 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { // Postfix operators starting with ? or ! conflict with builtin // unwrapping operators. if (Attributes.hasAttribute()) - if (!Tok.getText().empty() && (Tok.getRawText().front() == '?' || - Tok.getRawText().front() == '!')) + if (!Tok.getText().empty() && (Tok.getText().front() == '?' || + Tok.getText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); // Prefix operators may not contain the `/` character when `/.../` regex diff --git a/test/Parse/operator_escape.swift b/test/Parse/operator_escape.swift new file mode 100644 index 0000000000000..703f2b717cbb0 --- /dev/null +++ b/test/Parse/operator_escape.swift @@ -0,0 +1,26 @@ +// RUN: %target-typecheck-verify-swift + +postfix operator `!` +// expected-error@-1 {{cannot declare a custom postfix '!' operator}} +// expected-error@-2 {{postfix operator names starting with '?' or '!' are disallowed to avoid collisions with built-in unwrapping operators}} + +postfix operator `?` +// expected-error@-1 {{cannot declare a custom postfix '?' operator}} +// expected-error@-2 {{postfix operator names starting with '?' or '!' are disallowed to avoid collisions with built-in unwrapping operators}} + +postfix operator `?|` +// expected-error@-1 {{postfix operator names starting with '?' or '!' are disallowed to avoid collisions with built-in unwrapping operators}} + +let _ = 1 `+` 2 + +func foo(_ x: Int?) -> Int { + // TODO: This gets lexed separately, so gets turned into a non-special operator. + // Should we support this? + x`!` // expected-error {{'!' is not a postfix unary operator}} +} + +func bar(_ x: Bool) -> Bool { + `!`x +} + +`+ // expected-error {{expected expression}} From 493b4c5af5912228a32dc88a99a735e3e3d939df Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 10 May 2022 16:04:17 +0100 Subject: [PATCH 2/3] Re-allow prefix operators containing `/` --- include/swift/AST/DiagnosticsParse.def | 3 --- lib/Parse/ParseDecl.cpp | 7 ------- 2 files changed, 10 deletions(-) diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index 1459a35c97854..c1b0879aec041 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none, ERROR(regex_literal_parsing_error,none, "%0", (StringRef)) -ERROR(prefix_slash_not_allowed,none, - "prefix operator may not contain '/'", ()) - //------------------------------------------------------------------------------ // MARK: Lexer diagnostics //------------------------------------------------------------------------------ diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index a09a4e7c1d40e..7765c87be3149 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8539,13 +8539,6 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { Tok.getText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); - // Prefix operators may not contain the `/` character when `/.../` regex - // literals are enabled. - if (Context.LangOpts.EnableBareSlashRegexLiterals) { - if (Attributes.hasAttribute() && Tok.getText().contains("/")) - diagnose(Tok, diag::prefix_slash_not_allowed); - } - // A common error is to try to define an operator with something in the // unicode plane considered to be an operator, or to try to define an // operator like "not". Analyze and diagnose this specifically. From 07b86e940e242d273a46d4ba5a49e56b43ab0af5 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 10 May 2022 16:04:17 +0100 Subject: [PATCH 3/3] Lenient prefix `/` parsing --- include/swift/Parse/Parser.h | 5 +- lib/Parse/ParseExpr.cpp | 96 +++++++++++-------- .../Frontend/enable-flag.swift | 2 +- .../Parse/forward-slash-regex.swift | 39 ++++++-- .../Parse/regex_parse_error.swift | 8 +- 5 files changed, 92 insertions(+), 58 deletions(-) diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 04b9288a11b57..415e97c96e4c1 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -1763,10 +1763,7 @@ class Parser { /// Try re-lex a '/' operator character as a regex literal. This should be /// called when parsing in an expression position to ensure a regex literal is /// correctly parsed. - /// - /// If \p mustBeRegex is set to true, a regex literal will always be lexed if - /// enabled. Otherwise, it will not be lexed if it may be ambiguous. - void tryLexRegexLiteral(bool mustBeRegex); + void tryLexRegexLiteral(bool forUnappliedOperator); void validateCollectionElement(ParserResult element); diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 14769d47bd704..37e8582910907 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -513,7 +513,7 @@ ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { UnresolvedDeclRefExpr *Operator; // First check to see if we have the start of a regex literal `/.../`. - tryLexRegexLiteral(/*mustBeRegex*/ true); + tryLexRegexLiteral(/*forUnappliedOperator*/ false); switch (Tok.getKind()) { default: @@ -880,56 +880,70 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() { return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc)); } -void Parser::tryLexRegexLiteral(bool mustBeRegex) { +void Parser::tryLexRegexLiteral(bool forUnappliedOperator) { if (!Context.LangOpts.EnableBareSlashRegexLiterals) return; + // Never a regex literal. + if (Tok.isEscapedOperator()) + return; + // Check to see if we have a regex literal `/.../`, optionally with a prefix // operator e.g `!/.../`. + bool mustBeRegex = false; switch (Tok.getKind()) { case tok::oper_prefix: + // Prefix operators may contain `/` characters, so this may not be a regex, + // and as such need to make sure we have a closing `/`. The first character + // heuristics aren't relevant here as a right-bound operator will not have + // a space, tab, or `)` character. + break; case tok::oper_binary_spaced: - case tok::oper_binary_unspaced: { - // Check to see if we have an operator containing '/'. - auto slashIdx = Tok.getText().find("/"); - if (slashIdx == StringRef::npos) - break; + case tok::oper_binary_unspaced: + // When re-lexing for a 'proper' expression, binary operators are always + // invalid, so we can be confident in always lexing a regex literal. + mustBeRegex = !forUnappliedOperator; + break; + default: + // We only re-lex regex literals for operator tokens. + return; + } - CancellableBacktrackingScope backtrack(*this); - { - Optional regexScope; - regexScope.emplace(*L, mustBeRegex); - - // Try re-lex as a `/.../` regex literal, this will split an operator if - // necessary. - L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true); - - // If we didn't split a prefix operator, reset the regex lexing scope. - // Otherwise, we want to keep it in place for the next token. - auto didSplit = L->peekNextToken().getLength() == slashIdx; - if (!didSplit) - regexScope.reset(); - - // Discard the current token, which will be replaced by the re-lexed - // token, which will either be a regex literal token, a prefix operator, - // or the original unchanged token. - discardToken(); - - // If we split a prefix operator from the regex literal, and are not sure - // whether this should be a regex, backtrack if we didn't end up lexing a - // regex literal. - if (didSplit && !mustBeRegex && - !L->peekNextToken().is(tok::regex_literal)) { - return; - } + // Check to see if we have an operator containing '/'. + auto slashIdx = Tok.getText().find("/"); + if (slashIdx == StringRef::npos) + return; + + CancellableBacktrackingScope backtrack(*this); + { + Optional regexScope; + regexScope.emplace(*L, mustBeRegex); + + // Try re-lex as a `/.../` regex literal, this will split an operator if + // necessary. + L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true); + + // If we didn't split a prefix operator, reset the regex lexing scope. + // Otherwise, we want to keep it in place for the next token. + auto didSplit = L->peekNextToken().getLength() == slashIdx; + if (!didSplit) + regexScope.reset(); + + // Discard the current token, which will be replaced by the re-lexed + // token, which will either be a regex literal token, a prefix operator, + // or the original unchanged token. + discardToken(); - // Otherwise, accept the result. - backtrack.cancelBacktrack(); + // If we split a prefix operator from the regex literal, and are not sure + // whether this should be a regex, backtrack if we didn't end up lexing a + // regex literal. + if (didSplit && !mustBeRegex && + !L->peekNextToken().is(tok::regex_literal)) { + return; } - break; - } - default: - break; + + // Otherwise, accept the result. + backtrack.cancelBacktrack(); } } @@ -3226,7 +3240,7 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok, // First check to see if we have the start of a regex literal `/.../`. We // need to do this before handling unapplied operator references, as e.g // `(/, /)` might be a regex literal. - tryLexRegexLiteral(/*mustBeRegex*/ false); + tryLexRegexLiteral(/*forUnappliedOperator*/ true); // See if we have an operator decl ref '()'. The operator token in // this case lexes as a binary operator because it neither leads nor diff --git a/test/StringProcessing/Frontend/enable-flag.swift b/test/StringProcessing/Frontend/enable-flag.swift index bd43ba49e235f..601cdb71e1a59 100644 --- a/test/StringProcessing/Frontend/enable-flag.swift +++ b/test/StringProcessing/Frontend/enable-flag.swift @@ -4,7 +4,7 @@ // REQUIRES: swift_in_compiler -prefix operator / // expected-error {{prefix operator may not contain '/'}} +prefix operator / _ = /x/ _ = #/x/# diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift index 113f826d94117..4ae4b63f9b0ba 100644 --- a/test/StringProcessing/Parse/forward-slash-regex.swift +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -2,9 +2,11 @@ // REQUIRES: swift_in_compiler // REQUIRES: concurrency -prefix operator / // expected-error {{prefix operator may not contain '/'}} -prefix operator ^/ // expected-error {{prefix operator may not contain '/'}} -prefix operator /^/ // expected-error {{prefix operator may not contain '/'}} +prefix operator / +prefix operator ^/ +prefix operator /^/ + +prefix func ^/ (_ x: T) -> T { x } // expected-note {{'^/' declared here}} prefix operator !! prefix func !! (_ x: T) -> T { x } @@ -53,8 +55,9 @@ do { // expected-error@-3 {{'/' is not a postfix unary operator}} } +// No closing '/' so a prefix operator. _ = /x -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = !/x/ // expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} @@ -250,13 +253,15 @@ _ = await /x / // expected-warning {{no 'async' operations occur within 'await' // written a comment and is still in the middle of writing the characters before // it. _ = /x// comment -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = /x // comment -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = /x/*comment*/ -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} + +// MARK: Unapplied operators // These become regex literals, unless surrounded in parens. func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}} @@ -320,6 +325,26 @@ let arr: [Double] = [2, 3, 4] _ = arr.reduce(1, /) / 3 _ = arr.reduce(1, /) + arr.reduce(1, /) +// MARK: Backticks behavior + +// This is a prefix operator, even if there is a closing '/'. +_ = `/`x +// expected-error@-1 {{'/' is not a prefix unary operator}} + +_ = `/`x/ +// expected-error@-1 {{'/' is not a prefix unary operator}} +// expected-error@-2 {{'/' is not a postfix unary operator}} + +_ = ^/x/ +// expected-error@-1 {{'^' is not a prefix unary operator}} + +_ = `^/`x/ +// expected-error@-1 {{'/' is not a postfix unary operator}} + +_ = `!!`/x/ + +// MARK: Starting characters + // Fine. _ = /./ diff --git a/test/StringProcessing/Parse/regex_parse_error.swift b/test/StringProcessing/Parse/regex_parse_error.swift index 80e428469256d..f75f106bd92c4 100644 --- a/test/StringProcessing/Parse/regex_parse_error.swift +++ b/test/StringProcessing/Parse/regex_parse_error.swift @@ -30,17 +30,15 @@ _ = #/\(?'abc/# do { _ = /\ / - // expected-error@-2:7 {{unterminated regex literal}} - // expected-error@-3:9 {{expected escape sequence}} -} // expected-error@:1 {{expected expression after operator}} + // expected-error@-1:3 {{expected expression path in Swift key path}} +} do { _ = #/\ /# // expected-error@-2:7 {{unterminated regex literal}} // expected-error@-3:10 {{expected escape sequence}} - // expected-error@-3:3 {{unterminated regex literal}} - // expected-warning@-4:3 {{regular expression literal is unused}} + // expected-error@-3:4 {{expected expression}} } func foo(_ x: T, _ y: T) {}