File tree Expand file tree Collapse file tree 3 files changed +42
-1
lines changed Expand file tree Collapse file tree 3 files changed +42
-1
lines changed Original file line number Diff line number Diff line change @@ -10,6 +10,12 @@ and this project adheres to the
1010[ Python Version Specification] ( https://packaging.python.org/en/latest/specifications/version-specifiers/ ) .
1111See the [ Contributing Guide] ( contributing.md ) for details.
1212
13+ ## [ Unreleased]
14+
15+ ### Fixed
16+
17+ * Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554 ).
18+
1319## [ 3.9.0] - 2025-09-04
1420
1521### Changed
Original file line number Diff line number Diff line change 3333if TYPE_CHECKING : # pragma: no cover
3434 from markdown import Markdown
3535
36+ # Included for versions which do not have current comment fix
37+ commentclose = re .compile (r'--!?>' )
38+ commentabruptclose = re .compile (r'-?>' )
3639
3740# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
3841# Users can still do `from html import parser` and get the default behavior.
@@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:
302305 self .handle_data ('<?' )
303306 return i + 2
304307
308+ if not hasattr (htmlparser , 'commentabruptclose' ):
309+ # Internal -- parse comment, return length or -1 if not terminated
310+ # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
311+ def parse_comment (self , i , report = True ):
312+ rawdata = self .rawdata
313+ assert rawdata .startswith ('<!--' , i ), 'unexpected call to parse_comment()'
314+ match = commentclose .search (rawdata , i + 4 )
315+ if not match :
316+ match = commentabruptclose .match (rawdata , i + 4 )
317+ if not match :
318+ return - 1
319+ if report :
320+ j = match .start ()
321+ self .handle_comment (rawdata [i + 4 : j ])
322+ return match .end ()
323+
305324 def parse_html_declaration (self , i : int ) -> int :
306325 if self .at_line_start () or self .intail :
307326 if self .rawdata [i :i + 3 ] == '<![' and not self .rawdata [i :i + 9 ] == '<![CDATA[' :
Original file line number Diff line number Diff line change @@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):
10181018 # Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
10191019 # as browsers and all text after the opening comment tag was considered to be in a comment. However,
10201020 # that did not match the reference implementation. The new behavior does.
1021- def test_unclosed_comment_ (self ):
1021+ def test_unclosed_comment (self ):
10221022 self .assertMarkdownRenders (
10231023 self .dedent (
10241024 """
@@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):
10351035 )
10361036 )
10371037
1038+ def test_invalid_comment_end (self ):
1039+ self .assertMarkdownRenders (
1040+ self .dedent (
1041+ """
1042+ <!-- This comment is malformed and never closes -- >
1043+ Some content after the bad comment.
1044+ """
1045+ ),
1046+ self .dedent (
1047+ """
1048+ <p><!-- This comment is malformed and never closes -- >
1049+ Some content after the bad comment.</p>
1050+ """
1051+ )
1052+ )
1053+
10381054 def test_raw_processing_instruction_one_line (self ):
10391055 self .assertMarkdownRenders (
10401056 "<?php echo '>'; ?>" ,
You can’t perform that action at this time.
0 commit comments