Skip to content

Commit f82156b

Browse files
authored
Merge pull request #1993 from rust-lang/TC/add-grammar-comments-support
Add support to grammar for single line comments
2 parents b3ce606 + b3b582c commit f82156b

File tree

7 files changed

+124
-75
lines changed

7 files changed

+124
-75
lines changed

docs/grammar.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ BACKTICK -> U+0060
2727
2828
LF -> U+000A
2929
30-
Production -> `@root`? Name ` ->` Expression
30+
Production ->
31+
( Comment LF )*
32+
`@root`? Name ` ->` Expression
3133
3234
Name -> <Alphanumeric or `_`>+
3335
@@ -55,6 +57,7 @@ Expr1 ->
5557
Unicode
5658
| NonTerminal
5759
| Break
60+
| Comment
5861
| Terminal
5962
| Charset
6063
| Prose
@@ -67,6 +70,8 @@ NonTerminal -> Name
6770
6871
Break -> LF ` `+
6972
73+
Comment -> `//` ~[LF]+
74+
7075
Terminal -> BACKTICK ~[LF]+ BACKTICK
7176
7277
Charset -> `[` (` `* Characters)+ ` `* `]`
@@ -96,6 +101,7 @@ The general format is a series of productions separated by blank lines. The expr
96101
| Unicode | U+0060 | A single unicode character. |
97102
| NonTerminal | FunctionParameters | A reference to another production by name. |
98103
| Break | | This is used internally by the renderer to detect line breaks and indentation. |
104+
| Comment | // Single line comment. | A comment extending to the end of the line. |
99105
| Terminal | \`example\` | This is a sequence of exact characters, surrounded by backticks |
100106
| Charset | [ \`A\`-\`Z\` \`0\`-\`9\` \`_\` ] | A choice from a set of characters, space separated. There are three different forms. |
101107
| CharacterRange | [ \`A\`-\`Z\` ] | A range of characters, each character should be in backticks.

mdbook-spec/src/grammar.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ pub struct Grammar {
2222
#[derive(Debug)]
2323
pub struct Production {
2424
name: String,
25+
/// Comments and breaks that precede the production name.
26+
comments: Vec<Expression>,
2527
/// Category is from the markdown lang string, and defines how it is
2628
/// grouped and organized on the summary page.
2729
category: String,
@@ -70,6 +72,8 @@ enum ExpressionKind {
7072
///
7173
/// Used by the renderer to help format and structure the grammar.
7274
Break(usize),
75+
/// `// Single line comment.`
76+
Comment(String),
7377
/// ``[`A`-`Z` `_` LF]``
7478
Charset(Vec<Characters>),
7579
/// ``~[` ` LF]``
@@ -135,6 +139,7 @@ impl Expression {
135139
ExpressionKind::Terminal(_)
136140
| ExpressionKind::Prose(_)
137141
| ExpressionKind::Break(_)
142+
| ExpressionKind::Comment(_)
138143
| ExpressionKind::Unicode(_) => {}
139144
ExpressionKind::Charset(set) => {
140145
for ch in set {

mdbook-spec/src/grammar/parser.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ impl Parser<'_> {
122122
}
123123

124124
fn parse_production(&mut self, category: &str, path: &Path) -> Result<Production> {
125+
let mut comments = Vec::new();
126+
while let Ok(comment) = self.parse_comment() {
127+
self.expect("\n", "expected newline")?;
128+
comments.push(Expression::new_kind(comment));
129+
comments.push(Expression::new_kind(ExpressionKind::Break(0)));
130+
}
125131
let is_root = self.parse_is_root();
126132
self.space0();
127133
let name = self
@@ -133,6 +139,7 @@ impl Parser<'_> {
133139
};
134140
Ok(Production {
135141
name,
142+
comments,
136143
category: category.to_string(),
137144
expression,
138145
path: path.to_owned(),
@@ -218,6 +225,8 @@ impl Parser<'_> {
218225
bail!(self, "expected indentation on next line");
219226
}
220227
ExpressionKind::Break(space.len())
228+
} else if next == b'/' {
229+
self.parse_comment()?
221230
} else if next == b'`' {
222231
self.parse_terminal()?
223232
} else if next == b'[' {
@@ -269,6 +278,13 @@ impl Parser<'_> {
269278
Ok(term)
270279
}
271280

281+
/// Parse e.g. `// Single line comment.`.
282+
fn parse_comment(&mut self) -> Result<ExpressionKind> {
283+
self.expect("//", "expected `//`")?;
284+
let text = self.take_while(&|x| x != '\n').to_string();
285+
Ok(ExpressionKind::Comment(text))
286+
}
287+
272288
fn parse_charset(&mut self) -> Result<ExpressionKind> {
273289
self.expect("[", "expected opening [")?;
274290
let mut characters = Vec::new();

mdbook-spec/src/grammar/render_markdown.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ impl Production {
4545
.get(&self.name)
4646
.map(|path| path.to_string())
4747
.unwrap_or_else(|| format!("missing"));
48+
for expr in &self.comments {
49+
expr.render_markdown(cx, output);
50+
}
4851
write!(
4952
output,
5053
"<span class=\"grammar-text grammar-production\" id=\"{id}\" \
@@ -77,6 +80,7 @@ impl Expression {
7780
| ExpressionKind::Terminal(_)
7881
| ExpressionKind::Prose(_)
7982
| ExpressionKind::Break(_)
83+
| ExpressionKind::Comment(_)
8084
| ExpressionKind::Charset(_)
8185
| ExpressionKind::NegExpression(_)
8286
| ExpressionKind::Unicode(_) => &self.kind,
@@ -163,6 +167,9 @@ impl Expression {
163167
output.push_str("\\\n");
164168
output.push_str(&"&nbsp;".repeat(*indent));
165169
}
170+
ExpressionKind::Comment(s) => {
171+
write!(output, "<span class=\"grammar-comment\">// {s}</span>").unwrap();
172+
}
166173
ExpressionKind::Charset(set) => charset_render_markdown(cx, set, output),
167174
ExpressionKind::NegExpression(e) => {
168175
output.push('~');

mdbook-spec/src/grammar/render_railroad.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,11 @@ impl Expression {
102102
.map(|e| e.render_railroad(cx, stack))
103103
.filter_map(|n| n)
104104
.collect();
105+
if seq.is_empty() {
106+
return None;
107+
}
105108
let seq: Sequence<Box<dyn Node>> = Sequence::new(seq);
106-
Box::new(seq)
109+
Some(Box::new(seq))
107110
};
108111

109112
// If `stack` is true, split the sequence on Breaks and
@@ -127,16 +130,18 @@ impl Expression {
127130
&es[..]
128131
};
129132

130-
let mut breaks: Vec<_> =
131-
es.split(|e| e.is_break()).map(|es| make_seq(es)).collect();
133+
let mut breaks: Vec<_> = es
134+
.split(|e| e.is_break())
135+
.flat_map(|es| make_seq(es))
136+
.collect();
132137
// If there aren't any breaks, don't bother stacking.
133-
if breaks.len() == 1 {
134-
breaks.pop().unwrap()
135-
} else {
136-
Box::new(Stack::new(breaks))
138+
match breaks.len() {
139+
0 => return None,
140+
1 => breaks.pop().unwrap(),
141+
_ => Box::new(Stack::new(breaks)),
137142
}
138143
} else {
139-
make_seq(&es)
144+
make_seq(&es)?
140145
}
141146
}
142147
// Treat `e?` and `e{..1}` / `e{0..1}` equally.
@@ -205,6 +210,7 @@ impl Expression {
205210
ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
206211
ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
207212
ExpressionKind::Break(_) => return None,
213+
ExpressionKind::Comment(_) => return None,
208214
ExpressionKind::Charset(set) => {
209215
let ns: Vec<_> = set.iter().map(|c| c.render_railroad(cx)).collect();
210216
Box::new(Choice::<Box<dyn Node>>::new(ns))

src/notation.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
2727
| U+xxxx | U+0060 | A single unicode character |
2828
| \<text\> | \<any ASCII char except CR\> | An English description of what should be matched |
2929
| Rule <sub>suffix</sub> | IDENTIFIER_OR_KEYWORD <sub>_except `crate`_</sub> | A modification to the previous rule |
30+
| // Comment. | // Single line comment. | A comment extending to the end of the line. |
3031

3132
Sequences have a higher precedence than `|` alternation.
3233

0 commit comments

Comments
 (0)