From 6c04e0a7aeeee15fc11759dd563aecfe6df194a0 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 26 Jun 2025 14:19:15 -0700 Subject: [PATCH 1/2] Rewrite `macro_rules!` parser to not use the MBE engine itself The `macro_rules!` parser was written to match the series of rules using the macros-by-example (MBE) engine and a hand-written equivalent of the left-hand side of a MBE macro. This was complex to read, difficult to extend, and produced confusing error messages. Because it was using the MBE engine, any parse failure would be reported as if some macro was being applied to the `macro_rules!` invocation itself; for instance, errors would talk about "macro invocation", "macro arguments", and "macro call", when they were actually about the macro *definition*. And in practice, the `macro_rules!` parser only used the MBE engine to extract the left-hand side and right-hand side of each rule as a token tree, and then parsed the rest using a separate parser. Rewrite it to parse the series of rules using a simple loop, instead. This makes it more extensible in the future, and improves error messages. For instance, omitting a semicolon between rules will result in "expected `;`" and "unexpected token", rather than the confusing "no rules expected this token in macro call". This work was greatly aided by pair programming with Vincenzo Palazzo and Eric Holk. --- compiler/rustc_expand/src/mbe/diagnostics.rs | 36 +--- compiler/rustc_expand/src/mbe/macro_parser.rs | 2 - compiler/rustc_expand/src/mbe/macro_rules.rs | 203 +++++------------- compiler/rustc_span/src/symbol.rs | 1 - tests/ui/attributes/crate-type-macro-empty.rs | 2 +- .../attributes/crate-type-macro-empty.stderr | 4 +- tests/ui/macros/missing-semi.stderr | 4 +- tests/ui/parser/issues/issue-7970b.rs | 2 +- tests/ui/parser/issues/issue-7970b.stderr | 4 +- tests/ui/parser/macros-no-semicolon-items.rs | 2 +- .../parser/macros-no-semicolon-items.stderr | 4 +- 11 files changed, 65 insertions(+), 199 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs index 99aa376626d45..c607a3a3652c9 100644 --- a/compiler/rustc_expand/src/mbe/diagnostics.rs +++ b/compiler/rustc_expand/src/mbe/diagnostics.rs @@ -195,38 +195,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> { } } -/// Currently used by macro_rules! compilation to extract a little information from the `Failure` -/// case. -pub(crate) struct FailureForwarder<'matcher> { - expected_token: Option<&'matcher Token>, -} - -impl<'matcher> FailureForwarder<'matcher> { - pub(crate) fn new() -> Self { - Self { expected_token: None } - } -} - -impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> { - type Failure = (Token, u32, &'static str); - - fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure { - (tok, position, msg) - } - - fn description() -> &'static str { - "failure-forwarder" - } - - fn set_expected_token(&mut self, tok: &'matcher Token) { - self.expected_token = Some(tok); - } - - fn get_expected_token(&self) -> Option<&'matcher Token> { - self.expected_token - } -} - pub(super) fn emit_frag_parse_err( mut e: Diag<'_>, parser: &Parser<'_>, @@ -321,7 +289,7 @@ enum ExplainDocComment { }, } -pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) { +fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) { if let Ok(src) = sm.span_to_snippet(span) { if src.starts_with("///") || src.starts_with("/**") { err.subdiagnostic(ExplainDocComment::Outer { span }); @@ -333,7 +301,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For /// other tokens, this is "unexpected token...". -pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> { +fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> { if let Some(expected_token) = expected_token { Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok))) } else { diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 802e43209a51e..3f1fc841ea343 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -536,8 +536,6 @@ impl TtParser { // The separator matches the current token. Advance past it. mp.idx += 1; self.next_mps.push(mp); - } else { - track.set_expected_token(separator); } } &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => { diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index 432ab32474059..234e0257530c5 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag; use rustc_lint_defs::builtin::{ RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS, }; -use rustc_parse::parser::{ParseNtResult, Parser, Recovery}; +use rustc_parse::exp; +use rustc_parse::parser::{Parser, Recovery}; use rustc_session::Session; use rustc_session::parse::ParseSess; use rustc_span::edition::Edition; use rustc_span::hygiene::Transparency; -use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym}; +use rustc_span::{Ident, Span, kw, sym}; use tracing::{debug, instrument, trace, trace_span}; use super::macro_parser::{NamedMatches, NamedParseResult}; @@ -34,8 +35,6 @@ use crate::base::{ SyntaxExtensionKind, TTMacroExpander, }; use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment}; -use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg}; -use crate::mbe::macro_parser::NamedMatch::*; use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser}; use crate::mbe::transcribe::transcribe; use crate::mbe::{self, KleeneOp, macro_check}; @@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> { fn recovery() -> Recovery { Recovery::Forbidden } - - fn set_expected_token(&mut self, _tok: &'matcher Token) {} - fn get_expected_token(&self) -> Option<&'matcher Token> { - None - } } /// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to @@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>( Err(CanRetry::Yes) } -// Note that macro-by-example's input is also matched against a token tree: -// $( $lhs:tt => $rhs:tt );+ -// -// Holy self-referential! - /// Converts a macro item into a syntax extension. pub fn compile_declarative_macro( sess: &Session, @@ -390,153 +379,63 @@ pub fn compile_declarative_macro( }; let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new()); - let lhs_nm = Ident::new(sym::lhs, span); - let rhs_nm = Ident::new(sym::rhs, span); - let tt_spec = NonterminalKind::TT; let macro_rules = macro_def.macro_rules; + let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) }; - // Parse the macro_rules! invocation - - // The pattern that macro_rules matches. - // The grammar for macro_rules! is: - // $( $lhs:tt => $rhs:tt );+ - // ...quasiquoting this would be nice. - // These spans won't matter, anyways - let argument_gram = vec![ - mbe::TokenTree::Sequence( - DelimSpan::dummy(), - mbe::SequenceRepetition { - tts: vec![ - mbe::TokenTree::MetaVarDecl { span, name: lhs_nm, kind: tt_spec }, - mbe::TokenTree::token(token::FatArrow, span), - mbe::TokenTree::MetaVarDecl { span, name: rhs_nm, kind: tt_spec }, - ], - separator: Some(Token::new( - if macro_rules { token::Semi } else { token::Comma }, - span, - )), - kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span), - num_captures: 2, - }, - ), - // to phase into semicolon-termination instead of semicolon-separation - mbe::TokenTree::Sequence( - DelimSpan::dummy(), - mbe::SequenceRepetition { - tts: vec![mbe::TokenTree::token( - if macro_rules { token::Semi } else { token::Comma }, - span, - )], - separator: None, - kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span), - num_captures: 0, - }, - ), - ]; - // Convert it into `MatcherLoc` form. - let argument_gram = mbe::macro_parser::compute_locs(&argument_gram); - - let create_parser = || { - let body = macro_def.body.tokens.clone(); - Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS) - }; - - let parser = create_parser(); - let mut tt_parser = - TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro })); - let argument_map = - match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) { - Success(m) => m, - Failure(()) => { - debug!("failed to parse macro tt"); - // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it - // with another one that gives us the information we need. - // For this we need to reclone the macro body as the previous parser consumed it. - let retry_parser = create_parser(); - - let mut track = diagnostics::FailureForwarder::new(); - let parse_result = - tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track); - let Failure((token, _, msg)) = parse_result else { - unreachable!("matcher returned something other than Failure after retry"); - }; - - let s = parse_failure_msg(&token, track.get_expected_token()); - let sp = token.span.substitute_dummy(span); - let mut err = sess.dcx().struct_span_err(sp, s); - err.span_label(sp, msg); - annotate_doc_comment(&mut err, sess.source_map(), sp); - let guar = err.emit(); - return dummy_syn_ext(guar); - } - Error(sp, msg) => { - let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg); - return dummy_syn_ext(guar); - } - ErrorReported(guar) => { - return dummy_syn_ext(guar); - } - }; + let body = macro_def.body.tokens.clone(); + let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS); + // Don't abort iteration early, so that multiple errors can be reported. let mut guar = None; let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err()); - // Extract the arguments: - let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] { - MatchedSeq(s) => s - .iter() - .map(|m| { - if let MatchedSingle(ParseNtResult::Tt(tt)) = m { - let tt = mbe::quoted::parse( - &TokenStream::new(vec![tt.clone()]), - true, - sess, - node_id, - features, - edition, - ) - .pop() - .unwrap(); - // We don't handle errors here, the driver will abort - // after parsing/expansion. We can report every error in every macro this way. - check_emission(check_lhs_nt_follows(sess, node_id, &tt)); - return tt; - } - sess.dcx().span_bug(span, "wrong-structured lhs") - }) - .collect::>(), - _ => sess.dcx().span_bug(span, "wrong-structured lhs"), - }; - - let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] { - MatchedSeq(s) => s - .iter() - .map(|m| { - if let MatchedSingle(ParseNtResult::Tt(tt)) = m { - return mbe::quoted::parse( - &TokenStream::new(vec![tt.clone()]), - false, - sess, - node_id, - features, - edition, - ) - .pop() - .unwrap(); - } - sess.dcx().span_bug(span, "wrong-structured rhs") - }) - .collect::>(), - _ => sess.dcx().span_bug(span, "wrong-structured rhs"), - }; + let mut lhses = Vec::new(); + let mut rhses = Vec::new(); - for rhs in &rhses { - check_emission(check_rhs(sess, rhs)); + while p.token != token::Eof { + let lhs_tt = p.parse_token_tree(); + let lhs_tt = mbe::quoted::parse( + &TokenStream::new(vec![lhs_tt]), + true, // LHS + sess, + node_id, + features, + edition, + ) + .pop() + .unwrap(); + // We don't handle errors here, the driver will abort after parsing/expansion. We can + // report every error in every macro this way. + check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt)); + check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt))); + if let Err(e) = p.expect(exp!(FatArrow)) { + return dummy_syn_ext(e.emit()); + } + let rhs_tt = p.parse_token_tree(); + let rhs_tt = mbe::quoted::parse( + &TokenStream::new(vec![rhs_tt]), + false, // RHS + sess, + node_id, + features, + edition, + ) + .pop() + .unwrap(); + check_emission(check_rhs(sess, &rhs_tt)); + lhses.push(lhs_tt); + rhses.push(rhs_tt); + if p.token == token::Eof { + break; + } + if let Err(e) = p.expect(exp_sep) { + return dummy_syn_ext(e.emit()); + } } - // Don't abort iteration early, so that errors for multiple lhses can be reported. - for lhs in &lhses { - check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs))); + if lhses.is_empty() { + let guar = sess.dcx().span_err(span, "macros must contain at least one rule"); + return dummy_syn_ext(guar); } check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses)); diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 11463ad354a96..c17411d55f7c2 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1777,7 +1777,6 @@ symbols! { resume, return_position_impl_trait_in_trait, return_type_notation, - rhs, riscv_target_feature, rlib, ropi, diff --git a/tests/ui/attributes/crate-type-macro-empty.rs b/tests/ui/attributes/crate-type-macro-empty.rs index 5ff7fc002fde3..217ff598f7a4c 100644 --- a/tests/ui/attributes/crate-type-macro-empty.rs +++ b/tests/ui/attributes/crate-type-macro-empty.rs @@ -2,6 +2,6 @@ #[crate_type = foo!()] //~^ ERROR cannot find macro `foo` in this scope -macro_rules! foo {} //~ ERROR unexpected end of macro invocation +macro_rules! foo {} //~ ERROR macros must contain at least one rule fn main() {} diff --git a/tests/ui/attributes/crate-type-macro-empty.stderr b/tests/ui/attributes/crate-type-macro-empty.stderr index e48d3d95470d4..130fa454ca19e 100644 --- a/tests/ui/attributes/crate-type-macro-empty.stderr +++ b/tests/ui/attributes/crate-type-macro-empty.stderr @@ -1,8 +1,8 @@ -error: unexpected end of macro invocation +error: macros must contain at least one rule --> $DIR/crate-type-macro-empty.rs:5:1 | LL | macro_rules! foo {} - | ^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments + | ^^^^^^^^^^^^^^^^^^^ error: cannot find macro `foo` in this scope --> $DIR/crate-type-macro-empty.rs:2:16 diff --git a/tests/ui/macros/missing-semi.stderr b/tests/ui/macros/missing-semi.stderr index 0a7afe50059d3..c2e12adbb4b03 100644 --- a/tests/ui/macros/missing-semi.stderr +++ b/tests/ui/macros/missing-semi.stderr @@ -1,8 +1,10 @@ error: expected `;`, found `(` --> $DIR/missing-semi.rs:6:5 | +LL | } + | - expected `;` LL | () => { - | ^ no rules expected this token in macro call + | ^ unexpected token error: aborting due to 1 previous error diff --git a/tests/ui/parser/issues/issue-7970b.rs b/tests/ui/parser/issues/issue-7970b.rs index 1c4abce39598c..ae06aff7cef7e 100644 --- a/tests/ui/parser/issues/issue-7970b.rs +++ b/tests/ui/parser/issues/issue-7970b.rs @@ -1,4 +1,4 @@ fn main() {} macro_rules! test {} -//~^ ERROR unexpected end of macro invocation +//~^ ERROR macros must contain at least one rule diff --git a/tests/ui/parser/issues/issue-7970b.stderr b/tests/ui/parser/issues/issue-7970b.stderr index b23b09e752ce0..4715eb07c6d99 100644 --- a/tests/ui/parser/issues/issue-7970b.stderr +++ b/tests/ui/parser/issues/issue-7970b.stderr @@ -1,8 +1,8 @@ -error: unexpected end of macro invocation +error: macros must contain at least one rule --> $DIR/issue-7970b.rs:3:1 | LL | macro_rules! test {} - | ^^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments + | ^^^^^^^^^^^^^^^^^^^^ error: aborting due to 1 previous error diff --git a/tests/ui/parser/macros-no-semicolon-items.rs b/tests/ui/parser/macros-no-semicolon-items.rs index 3afc275d61a2b..86889279cea6a 100644 --- a/tests/ui/parser/macros-no-semicolon-items.rs +++ b/tests/ui/parser/macros-no-semicolon-items.rs @@ -1,5 +1,5 @@ macro_rules! foo() //~ ERROR semicolon - //~| ERROR unexpected end of macro + //~| ERROR macros must contain at least one rule macro_rules! bar { ($($tokens:tt)*) => {} diff --git a/tests/ui/parser/macros-no-semicolon-items.stderr b/tests/ui/parser/macros-no-semicolon-items.stderr index 07fa2439df504..f8f3ed83688ce 100644 --- a/tests/ui/parser/macros-no-semicolon-items.stderr +++ b/tests/ui/parser/macros-no-semicolon-items.stderr @@ -38,11 +38,11 @@ help: add a semicolon LL | ); | + -error: unexpected end of macro invocation +error: macros must contain at least one rule --> $DIR/macros-no-semicolon-items.rs:1:1 | LL | macro_rules! foo() - | ^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments + | ^^^^^^^^^^^^^^^^^^ error: aborting due to 3 previous errors From 07760822db8dab9dc2bec7d2af23d804ab19da22 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 26 Jun 2025 15:06:31 -0700 Subject: [PATCH 2/2] mbe: Fold calls to `check_meta_variables` into the parser loop --- compiler/rustc_expand/src/mbe/macro_check.rs | 23 ++++++-------------- compiler/rustc_expand/src/mbe/macro_rules.rs | 3 +-- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs index dc2d46c4a1419..bbdff866feba4 100644 --- a/compiler/rustc_expand/src/mbe/macro_check.rs +++ b/compiler/rustc_expand/src/mbe/macro_check.rs @@ -105,8 +105,6 @@ //! stored when entering a macro definition starting from the state in which the meta-variable is //! bound. -use std::iter; - use rustc_ast::token::{Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::{DUMMY_NODE_ID, NodeId}; use rustc_data_structures::fx::FxHashMap; @@ -190,29 +188,22 @@ struct MacroState<'a> { ops: SmallVec<[KleeneToken; 1]>, } -/// Checks that meta-variables are used correctly in a macro definition. +/// Checks that meta-variables are used correctly in one rule of a macro definition. /// /// Arguments: /// - `psess` is used to emit diagnostics and lints /// - `node_id` is used to emit lints -/// - `span` is used when no spans are available -/// - `lhses` and `rhses` should have the same length and represent the macro definition +/// - `lhs` and `rhs` represent the rule pub(super) fn check_meta_variables( psess: &ParseSess, node_id: NodeId, - span: Span, - lhses: &[TokenTree], - rhses: &[TokenTree], + lhs: &TokenTree, + rhs: &TokenTree, ) -> Result<(), ErrorGuaranteed> { - if lhses.len() != rhses.len() { - psess.dcx().span_bug(span, "length mismatch between LHSes and RHSes") - } let mut guar = None; - for (lhs, rhs) in iter::zip(lhses, rhses) { - let mut binders = Binders::default(); - check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar); - check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar); - } + let mut binders = Binders::default(); + check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar); + check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar); guar.map_or(Ok(()), Err) } diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index 234e0257530c5..dad2fd99ef205 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -423,6 +423,7 @@ pub fn compile_declarative_macro( .pop() .unwrap(); check_emission(check_rhs(sess, &rhs_tt)); + check_emission(macro_check::check_meta_variables(&sess.psess, node_id, &lhs_tt, &rhs_tt)); lhses.push(lhs_tt); rhses.push(rhs_tt); if p.token == token::Eof { @@ -438,8 +439,6 @@ pub fn compile_declarative_macro( return dummy_syn_ext(guar); } - check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses)); - let transparency = find_attr!(attrs, AttributeKind::MacroTransparency(x) => *x) .unwrap_or(Transparency::fallback(macro_rules));