Skip to content

Commit 9f8bc1f

Browse files
committed
Add access to base_href and url_rewriter in tag rewriting
1 parent 0cfd96b commit 9f8bc1f

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

src/zimscraperlib/rewriting/html.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,11 @@ def handle_starttag(self, tag: str, attrs: AttrsList, *, auto_close: bool = Fals
178178

179179
if (
180180
rewritten := rules.do_tag_rewrite(
181-
tag=tag, attrs=attrs, auto_close=auto_close
181+
tag=tag,
182+
attrs=attrs,
183+
url_rewriter=self.url_rewriter,
184+
base_href=self.base_href,
185+
auto_close=auto_close,
182186
)
183187
) is not None:
184188
self.send(rewritten)
@@ -468,6 +472,8 @@ def do_tag_rewrite(
468472
self,
469473
tag: str,
470474
attrs: AttrsList,
475+
url_rewriter: ArticleUrlRewriter,
476+
base_href: str | None,
471477
*,
472478
auto_close: bool,
473479
) -> str | None:
@@ -484,6 +490,8 @@ def do_tag_rewrite(
484490
for arg_name, arg_value in { # pyright: ignore[reportUnknownVariableType]
485491
"tag": tag,
486492
"attrs": attrs,
493+
"url_rewriter": url_rewriter,
494+
"base_href": base_href,
487495
"auto_close": auto_close,
488496
}.items()
489497
if arg_name in _cached_signature(rule.func).parameters

tests/rewriting/test_html_rewriting.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,20 @@ def rewrite2_tag(
12691269
)
12701270

12711271

1272+
@rules.rewrite_tag()
1273+
def rewrite3_tag(
1274+
tag: str,
1275+
base_href: str | None,
1276+
url_rewriter: ArticleUrlRewriter,
1277+
) -> str | None:
1278+
if tag != "rewrite3":
1279+
return
1280+
rewriten_url = url_rewriter(
1281+
"https://www.acme.com/foo.img", base_href=base_href
1282+
).rewriten_url
1283+
return f'<rewriten src="{rewriten_url}" />'
1284+
1285+
12721286
@pytest.mark.parametrize(
12731287
"tag, attrs, auto_close, expected_result",
12741288
[
@@ -1300,6 +1314,13 @@ def rewrite2_tag(
13001314
'<rewriten attr2="value2"/>',
13011315
id="rewrite2_tag_auto_close",
13021316
),
1317+
pytest.param(
1318+
"rewrite3",
1319+
[("attr2", "value2")],
1320+
True,
1321+
'<rewriten src="https://www.acme.com/foo.img?queryparam" />',
1322+
id="rewrite3_use_url_rewriter",
1323+
),
13031324
],
13041325
)
13051326
def test_html_tag_rewrite_rules(
@@ -1308,11 +1329,16 @@ def test_html_tag_rewrite_rules(
13081329
*,
13091330
auto_close: bool,
13101331
expected_result: str | None,
1332+
simple_url_rewriter_gen: Callable[[str, str], ArticleUrlRewriter],
13111333
):
13121334
assert (
13131335
rules.do_tag_rewrite(
13141336
tag=tag,
13151337
attrs=attrs,
1338+
url_rewriter=simple_url_rewriter_gen(
1339+
"http://www.example.com", "?queryparam"
1340+
),
1341+
base_href=None,
13161342
auto_close=auto_close,
13171343
)
13181344
== expected_result

0 commit comments

Comments
 (0)