1414import re
1515import sys
1616
17- from unittest import mock
17+ from html . parser import HTMLParser
1818from typing import Any , IO
19+ from unittest import mock
1920
2021import pytest
2122
@@ -94,6 +95,12 @@ def get_html_index_content(self) -> str:
9495 )
9596 return index
9697
98+ def get_html_report_text_lines (self , module : str ) -> list [str ]:
99+ """Parse the HTML report, and return a list of strings, the text rendered."""
100+ parser = HtmlReportParser ()
101+ parser .feed (self .get_html_report_content (module ))
102+ return parser .text ()
103+
97104 def assert_correct_timestamp (self , html : str ) -> None :
98105 """Extract the time stamp from `html`, and assert it is recent."""
99106 timestamp_pat = r"created at (\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2})"
@@ -133,6 +140,43 @@ def assert_valid_hrefs(self, directory: str = "htmlcov") -> None:
133140 )
134141
135142
143+ class HtmlReportParser (HTMLParser ): # pylint: disable=abstract-method
144+ """An HTML parser for our HTML reports.
145+
146+ Assertions are made about the structure we expect.
147+ """
148+ def __init__ (self ) -> None :
149+ super ().__init__ ()
150+ self .lines : list [list [str ]] = []
151+ self .in_source = False
152+
153+ def handle_starttag (self , tag : str , attrs : list [tuple [str , str | None ]]) -> None :
154+ if tag == "main" :
155+ assert attrs == [("id" , "source" )]
156+ self .in_source = True
157+ elif self .in_source and tag == "a" :
158+ dattrs = dict (attrs )
159+ assert "id" in dattrs
160+ ida = dattrs ["id" ]
161+ assert ida is not None
162+ assert ida [0 ] == "t"
163+ line_no = int (ida [1 :])
164+ self .lines .append ([])
165+ assert line_no == len (self .lines )
166+
167+ def handle_endtag (self , tag : str ) -> None :
168+ if tag == "main" :
169+ self .in_source = False
170+
171+ def handle_data (self , data : str ) -> None :
172+ if self .in_source and self .lines :
173+ self .lines [- 1 ].append (data )
174+
175+ def text (self ) -> list [str ]:
176+ """Get the rendered text as a list of strings, one per line."""
177+ return ["" .join (l ).rstrip () for l in self .lines ]
178+
179+
136180class FileWriteTracker :
137181 """A fake object to track how `open` is used to write files."""
138182 def __init__ (self , written : set [str ]) -> None :
@@ -1141,10 +1185,10 @@ def test_bug_1828(self) -> None:
11411185
11421186 cov = coverage .Coverage ()
11431187 backslashes = self .start_import_stop (cov , "backslashes" )
1144- cov .html_report (backslashes , directory = "out" )
1188+ cov .html_report (backslashes )
11451189
11461190 contains (
1147- "out /backslashes_py.html" ,
1191+ "htmlcov /backslashes_py.html" ,
11481192 # line 2 is `"bbb \`
11491193 r'<a id="t2" href="#t2">2</a></span>'
11501194 + r'<span class="t"> <span class="str">"bbb \</span>' ,
@@ -1153,6 +1197,12 @@ def test_bug_1828(self) -> None:
11531197 + r'<span class="t"><span class="str"> ccc"</span><span class="op">]</span>' ,
11541198 )
11551199
1200+ assert self .get_html_report_text_lines ("backslashes.py" ) == [
1201+ '1a = ["aaa",\\ ' ,
1202+ '2 "bbb \\ ' ,
1203+ '3 ccc"]' ,
1204+ ]
1205+
11561206 def test_unicode (self ) -> None :
11571207 surrogate = "\U000e0100 "
11581208
0 commit comments