1313from sphinx .util .nodes import make_id
1414
1515if TYPE_CHECKING :
16- from collections .abc import Sequence
17- from typing import Any
16+ from collections .abc import Iterable , Iterator , Sequence
17+ from typing import Any , Final
1818
1919 from docutils .nodes import Node
2020 from sphinx .application import Sphinx
@@ -41,98 +41,140 @@ class GrammarSnippetBase(SphinxDirective):
4141
4242 # The option/argument handling is left to the individual classes.
4343
44+ grammar_re : Final = re .compile (
45+ r"""
46+ (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
47+ (?=:) # ... followed by a colon
48+ |
49+ (?P<rule_ref>`[^\s`]+`) # identifier in backquotes
50+ |
51+ (?P<single_quoted>'[^']*') # string in 'quotes'
52+ |
53+ (?P<double_quoted>"[^"]*") # string in "quotes"
54+ """ ,
55+ re .VERBOSE ,
56+ )
57+
4458 def make_grammar_snippet (
4559 self , options : dict [str , Any ], content : Sequence [str ]
46- ) -> list [nodes . paragraph ]:
60+ ) -> list [addnodes . productionlist ]:
4761 """Create a literal block from options & content."""
4862
4963 group_name = options ['group' ]
50-
51- # Docutils elements have a `rawsource` attribute that is supposed to be
52- # set to the original ReST source.
53- # Sphinx does the following with it:
54- # - if it's empty, set it to `self.astext()`
55- # - if it matches `self.astext()` when generating the output,
56- # apply syntax highlighting (which is based on the plain-text content
57- # and thus discards internal formatting, like references).
58- # To get around this, we set it to this non-empty string:
59- rawsource = 'You should not see this.'
60-
61- literal = nodes .literal_block (
62- rawsource ,
64+ node_location = self .get_location ()
65+ production_nodes = []
66+ for rawsource , production_defs in self .production_definitions (content ):
67+ production = self .make_production (
68+ rawsource ,
69+ production_defs ,
70+ group_name = group_name ,
71+ location = node_location ,
72+ )
73+ production_nodes .append (production )
74+
75+ node = addnodes .productionlist (
6376 '' ,
77+ * production_nodes ,
78+ support_smartquotes = False ,
6479 classes = ['highlight' ],
6580 )
81+ self .set_source_info (node )
82+ return [node ]
6683
67- grammar_re = re .compile (
68- r"""
69- (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
70- (?=:) # ... followed by a colon
71- |
72- (?P<rule_ref>`[^\s`]+`) # identifier in backquotes
73- |
74- (?P<single_quoted>'[^']*') # string in 'quotes'
75- |
76- (?P<double_quoted>"[^"]*") # string in "quotes"
77- """ ,
78- re .VERBOSE ,
79- )
80-
81- for line in content :
84+ def production_definitions (
85+ self , lines : Iterable [str ], /
86+ ) -> Iterator [tuple [str , list [tuple [str , str ]]]]:
87+ """Yield pairs of rawsource and production content dicts."""
88+ production_lines : list [str ] = []
89+ production_content : list [tuple [str , str ]] = []
90+ for line in lines :
91+ # If this line is the start of a new rule (text in the column 1),
92+ # emit the current production and start a new one.
93+ if not line [:1 ].isspace ():
94+ rawsource = '\n ' .join (production_lines )
95+ production_lines .clear ()
96+ if production_content :
97+ yield rawsource , production_content
98+ production_content = []
99+
100+ # Append the current line for the raw source
101+ production_lines .append (line )
102+
103+ # Parse the line into constituent parts
82104 last_pos = 0
83- for match in grammar_re .finditer (line ):
105+ for match in self . grammar_re .finditer (line ):
84106 # Handle text between matches
85107 if match .start () > last_pos :
86- literal += nodes .Text (line [last_pos : match .start ()])
108+ unmatched_text = line [last_pos : match .start ()]
109+ production_content .append (('text' , unmatched_text ))
87110 last_pos = match .end ()
88111
89- # Handle matches
90- group_dict = {
91- name : content
92- for name , content in match .groupdict ().items ()
112+ # Handle matches.
113+ # After filtering None (non-matches), exactly one groupdict()
114+ # entry should remain.
115+ [(re_group_name , content )] = (
116+ (re_group_name , content )
117+ for re_group_name , content in match .groupdict ().items ()
93118 if content is not None
94- }
95- match group_dict :
96- case {'rule_name' : name }:
97- literal += self .make_link_target_for_token (
98- group_name , name
99- )
100- case {'rule_ref' : ref_text }:
101- literal += token_xrefs (ref_text , group_name )
102- case {'single_quoted' : name } | {'double_quoted' : name }:
103- literal += snippet_string_node ('' , name )
104- case _:
105- raise ValueError ('unhandled match' )
106- literal += nodes .Text (line [last_pos :] + '\n ' )
107-
108- node = nodes .paragraph (
109- '' ,
110- '' ,
111- literal ,
112- )
119+ )
120+ production_content .append ((re_group_name , content ))
121+ production_content .append (('text' , line [last_pos :] + '\n ' ))
113122
114- return [node ]
123+ # Emit the final production
124+ if production_content :
125+ rawsource = '\n ' .join (production_lines )
126+ yield rawsource , production_content
115127
116- def make_link_target_for_token (
117- self , group_name : str , name : str
128+ def make_production (
129+ self ,
130+ rawsource : str ,
131+ production_defs : list [tuple [str , str ]],
132+ * ,
133+ group_name : str ,
134+ location : str ,
135+ ) -> addnodes .production :
136+ """Create a production node from a list of parts."""
137+ production_node = addnodes .production (rawsource )
138+ for re_group_name , content in production_defs :
139+ match re_group_name :
140+ case 'rule_name' :
141+ production_node += self .make_name_target (
142+ name = content ,
143+ production_group = group_name ,
144+ location = location ,
145+ )
146+ case 'rule_ref' :
147+ production_node += token_xrefs (content , group_name )
148+ case 'single_quoted' | 'double_quoted' :
149+ production_node += snippet_string_node ('' , content )
150+ case 'text' :
151+ production_node += nodes .Text (content )
152+ case _:
153+ raise ValueError (f'unhandled match: { re_group_name !r} ' )
154+ return production_node
155+
156+ def make_name_target (
157+ self ,
158+ * ,
159+ name : str ,
160+ production_group : str ,
161+ location : str ,
118162 ) -> addnodes .literal_strong :
119- """Return a literal node which is a link target for the given token."""
120- name_node = addnodes .literal_strong ()
163+ """Make a link target for the given production."""
121164
122165 # Cargo-culted magic to make `name_node` a link target
123166 # similar to Sphinx `production`.
124167 # This needs to be the same as what Sphinx does
125168 # to avoid breaking existing links.
126- domain = self . env . domains [ 'std' ]
127- obj_name = f" { group_name } : { name } "
128- prefix = f'grammar-token-{ group_name } '
169+
170+ name_node = addnodes . literal_strong ( name , name )
171+ prefix = f'grammar-token-{ production_group } '
129172 node_id = make_id (self .env , self .state .document , prefix , name )
130173 name_node ['ids' ].append (node_id )
131174 self .state .document .note_implicit_target (name_node , name_node )
132- domain .note_object ('token' , obj_name , node_id , location = name_node )
133-
134- text_node = nodes .Text (name )
135- name_node += text_node
175+ obj_name = f'{ production_group } :{ name } ' if production_group else name
176+ std = self .env .domains .standard_domain
177+ std .note_object ('token' , obj_name , node_id , location = location )
136178 return name_node
137179
138180
@@ -168,7 +210,7 @@ class GrammarSnippetDirective(GrammarSnippetBase):
168210 optional_arguments = 1
169211 final_argument_whitespace = True
170212
171- def run (self ) -> list [nodes . paragraph ]:
213+ def run (self ) -> list [addnodes . productionlist ]:
172214 return self .make_grammar_snippet (self .options , self .content )
173215
174216
@@ -187,7 +229,7 @@ class CompatProductionList(GrammarSnippetBase):
187229 final_argument_whitespace = True
188230 option_spec = {}
189231
190- def run (self ) -> list [nodes . paragraph ]:
232+ def run (self ) -> list [addnodes . productionlist ]:
191233 # The "content" of a productionlist is actually the first and only
192234 # argument. The first line is the group; the rest is the content lines.
193235 lines = self .arguments [0 ].splitlines ()
0 commit comments