Merge pull request #1993 from rust-lang/TC/add-grammar-comments-support

ehuss · web-flow · commit f82156b8c3a7 · 2025-09-08T18:07:29.000Z
Add support to grammar for single line comments
diff --git a/docs/grammar.md b/docs/grammar.md
@@ -27,7 +27,9 @@ BACKTICK -> U+0060
 
 LF -> U+000A
 
-Production -> `@root`? Name ` ->` Expression
+Production ->
+    ( Comment LF )*
+    `@root`? Name ` ->` Expression
 
 Name -> <Alphanumeric or `_`>+
 
@@ -55,6 +57,7 @@ Expr1 ->
       Unicode
     | NonTerminal
     | Break
+    | Comment
     | Terminal
     | Charset
     | Prose
@@ -67,6 +70,8 @@ NonTerminal -> Name
 
 Break -> LF ` `+
 
+Comment -> `//` ~[LF]+
+
 Terminal -> BACKTICK ~[LF]+ BACKTICK
 
 Charset -> `[` (` `* Characters)+ ` `* `]`
@@ -96,6 +101,7 @@ The general format is a series of productions separated by blank lines. The expr
 | Unicode | U+0060 | A single unicode character. |
 | NonTerminal | FunctionParameters | A reference to another production by name. |
 | Break | | This is used internally by the renderer to detect line breaks and indentation. |
+| Comment | // Single line comment. | A comment extending to the end of the line. |
 | Terminal | \`example\` | This is a sequence of exact characters, surrounded by backticks |
 | Charset | [ \`A\`-\`Z\` \`0\`-\`9\` \`_\` ] | A choice from a set of characters, space separated. There are three different forms. |
 | CharacterRange | [ \`A\`-\`Z\` ] | A range of characters, each character should be in backticks.
diff --git a/mdbook-spec/src/grammar.rs b/mdbook-spec/src/grammar.rs
@@ -22,6 +22,8 @@ pub struct Grammar {
 #[derive(Debug)]
 pub struct Production {
     name: String,
+    /// Comments and breaks that precede the production name.
+    comments: Vec<Expression>,
     /// Category is from the markdown lang string, and defines how it is
     /// grouped and organized on the summary page.
     category: String,
@@ -70,6 +72,8 @@ enum ExpressionKind {
     ///
     /// Used by the renderer to help format and structure the grammar.
     Break(usize),
+    /// `// Single line comment.`
+    Comment(String),
     /// ``[`A`-`Z` `_` LF]``
     Charset(Vec<Characters>),
     /// ``~[` ` LF]``
@@ -135,6 +139,7 @@ impl Expression {
             ExpressionKind::Terminal(_)
             | ExpressionKind::Prose(_)
             | ExpressionKind::Break(_)
+            | ExpressionKind::Comment(_)
             | ExpressionKind::Unicode(_) => {}
             ExpressionKind::Charset(set) => {
                 for ch in set {
diff --git a/mdbook-spec/src/grammar/parser.rs b/mdbook-spec/src/grammar/parser.rs
@@ -122,6 +122,12 @@ impl Parser<'_> {
     }
 
     fn parse_production(&mut self, category: &str, path: &Path) -> Result<Production> {
+        let mut comments = Vec::new();
+        while let Ok(comment) = self.parse_comment() {
+            self.expect("\n", "expected newline")?;
+            comments.push(Expression::new_kind(comment));
+            comments.push(Expression::new_kind(ExpressionKind::Break(0)));
+        }
         let is_root = self.parse_is_root();
         self.space0();
         let name = self
@@ -133,6 +139,7 @@ impl Parser<'_> {
         };
         Ok(Production {
             name,
+            comments,
             category: category.to_string(),
             expression,
             path: path.to_owned(),
@@ -218,6 +225,8 @@ impl Parser<'_> {
                 bail!(self, "expected indentation on next line");
             }
             ExpressionKind::Break(space.len())
+        } else if next == b'/' {
+            self.parse_comment()?
         } else if next == b'`' {
             self.parse_terminal()?
         } else if next == b'[' {
@@ -269,6 +278,13 @@ impl Parser<'_> {
         Ok(term)
     }
 
+    /// Parse e.g. `// Single line comment.`.
+    fn parse_comment(&mut self) -> Result<ExpressionKind> {
+        self.expect("//", "expected `//`")?;
+        let text = self.take_while(&|x| x != '\n').to_string();
+        Ok(ExpressionKind::Comment(text))
+    }
+
     fn parse_charset(&mut self) -> Result<ExpressionKind> {
         self.expect("[", "expected opening [")?;
         let mut characters = Vec::new();
diff --git a/mdbook-spec/src/grammar/render_markdown.rs b/mdbook-spec/src/grammar/render_markdown.rs
@@ -45,6 +45,9 @@ impl Production {
             .get(&self.name)
             .map(|path| path.to_string())
             .unwrap_or_else(|| format!("missing"));
+        for expr in &self.comments {
+            expr.render_markdown(cx, output);
+        }
         write!(
             output,
             "<span class=\"grammar-text grammar-production\" id=\"{id}\" \
@@ -77,6 +80,7 @@ impl Expression {
             | ExpressionKind::Terminal(_)
             | ExpressionKind::Prose(_)
             | ExpressionKind::Break(_)
+            | ExpressionKind::Comment(_)
             | ExpressionKind::Charset(_)
             | ExpressionKind::NegExpression(_)
             | ExpressionKind::Unicode(_) => &self.kind,
@@ -163,6 +167,9 @@ impl Expression {
                 output.push_str("\\\n");
                 output.push_str(&"&nbsp;".repeat(*indent));
             }
+            ExpressionKind::Comment(s) => {
+                write!(output, "<span class=\"grammar-comment\">// {s}</span>").unwrap();
+            }
             ExpressionKind::Charset(set) => charset_render_markdown(cx, set, output),
             ExpressionKind::NegExpression(e) => {
                 output.push('~');
diff --git a/mdbook-spec/src/grammar/render_railroad.rs b/mdbook-spec/src/grammar/render_railroad.rs
@@ -102,8 +102,11 @@ impl Expression {
                                 .map(|e| e.render_railroad(cx, stack))
                                 .filter_map(|n| n)
                                 .collect();
+                            if seq.is_empty() {
+                                return None;
+                            }
                             let seq: Sequence<Box<dyn Node>> = Sequence::new(seq);
-                            Box::new(seq)
+                            Some(Box::new(seq))
                         };
 
                         // If `stack` is true, split the sequence on Breaks and
@@ -127,16 +130,18 @@ impl Expression {
                                 &es[..]
                             };
 
-                            let mut breaks: Vec<_> =
-                                es.split(|e| e.is_break()).map(|es| make_seq(es)).collect();
+                            let mut breaks: Vec<_> = es
+                                .split(|e| e.is_break())
+                                .flat_map(|es| make_seq(es))
+                                .collect();
                             // If there aren't any breaks, don't bother stacking.
-                            if breaks.len() == 1 {
-                                breaks.pop().unwrap()
-                            } else {
-                                Box::new(Stack::new(breaks))
+                            match breaks.len() {
+                                0 => return None,
+                                1 => breaks.pop().unwrap(),
+                                _ => Box::new(Stack::new(breaks)),
                             }
                         } else {
-                            make_seq(&es)
+                            make_seq(&es)?
                         }
                     }
                     // Treat `e?` and `e{..1}` / `e{0..1}` equally.
@@ -205,6 +210,7 @@ impl Expression {
                     ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
                     ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
                     ExpressionKind::Break(_) => return None,
+                    ExpressionKind::Comment(_) => return None,
                     ExpressionKind::Charset(set) => {
                         let ns: Vec<_> = set.iter().map(|c| c.render_railroad(cx)).collect();
                         Box::new(Choice::<Box<dyn Node>>::new(ns))
diff --git a/src/notation.md b/src/notation.md
@@ -27,6 +27,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
 | U+xxxx            | U+0060                        | A single unicode character                |
 | \<text\>          | \<any ASCII char except CR\>  | An English description of what should be matched |
 | Rule <sub>suffix</sub> | IDENTIFIER_OR_KEYWORD <sub>_except `crate`_</sub> | A modification to the previous rule |
+| // Comment. | // Single line comment. | A comment extending to the end of the line. |
 
 Sequences have a higher precedence than `|` alternation.
 
diff --git a/theme/reference.css b/theme/reference.css