@@ -147,6 +147,12 @@ def check_validity(cls, value: str) -> None:
147147 raise ValueError (f"Unexpected password in value: { value } { parts .password } " )
148148
149149
150+ class RewriteResult (NamedTuple ):
151+ absolute_url : str
152+ rewriten_url : str
153+ zim_path : ZimPath | None
154+
155+
150156class ArticleUrlRewriter :
151157 """
152158 Rewrite urls in article.
@@ -176,16 +182,11 @@ def __init__(
176182 missing_zim_paths: list of ZIM paths which are known to already be missing
177183 from the existing_zim_paths ; usefull only in complement with this variable ;
178184 new missing entries will be added as URLs are normalized in this function
179-
180- Results:
181- items_to_download: populated with the list of rewritten URLs, so that one
182- might use it to download items after rewriting the document
183185 """
184186 self .article_path = article_path or ArticleUrlRewriter .normalize (article_url )
185187 self .article_url = article_url
186188 self .existing_zim_paths = existing_zim_paths
187189 self .missing_zim_paths = missing_zim_paths
188- self .items_to_download : dict [ZimPath , HttpUrl ] = {}
189190
190191 def get_item_path (self , item_url : str , base_href : str | None ) -> ZimPath :
191192 """Utility to transform an item URL into a ZimPath"""
@@ -201,7 +202,7 @@ def __call__(
201202 base_href : str | None ,
202203 * ,
203204 rewrite_all_url : bool = True ,
204- ) -> str :
205+ ) -> RewriteResult :
205206 """Rewrite a url contained in a article.
206207
207208 The url is "fully" rewrited to point to a normalized entry path
@@ -210,17 +211,25 @@ def __call__(
210211 try :
211212 item_url = item_url .strip ()
212213
214+ item_absolute_url = urljoin (
215+ urljoin (self .article_url .value , base_href ), item_url
216+ )
217+
213218 # Make case of standalone fragments more straightforward
214219 if item_url .startswith ("#" ):
215- return item_url
220+ return RewriteResult (
221+ absolute_url = item_absolute_url ,
222+ rewriten_url = item_url ,
223+ zim_path = None ,
224+ )
216225
217226 item_scheme = urlsplit (item_url ).scheme
218227 if item_scheme and item_scheme not in ("http" , "https" ):
219- return item_url
220-
221- item_absolute_url = urljoin (
222- urljoin ( self . article_url . value , base_href ), item_url
223- )
228+ return RewriteResult (
229+ absolute_url = item_absolute_url ,
230+ rewriten_url = item_url ,
231+ zim_path = None ,
232+ )
224233
225234 item_fragment = urlsplit (item_absolute_url ).fragment
226235
@@ -229,9 +238,11 @@ def __call__(
229238 if rewrite_all_url or (
230239 self .existing_zim_paths and item_path in self .existing_zim_paths
231240 ):
232- if item_path not in self .items_to_download :
233- self .items_to_download [item_path ] = HttpUrl (item_absolute_url )
234- return self .get_document_uri (item_path , item_fragment )
241+ return RewriteResult (
242+ absolute_url = item_absolute_url ,
243+ rewriten_url = self .get_document_uri (item_path , item_fragment ),
244+ zim_path = item_path ,
245+ )
235246 else :
236247 if (
237248 self .missing_zim_paths is not None
@@ -242,7 +253,11 @@ def __call__(
242253 # with duplicate messages
243254 self .missing_zim_paths .add (item_path )
244255 # The url doesn't point to a known entry
245- return item_absolute_url
256+ return RewriteResult (
257+ absolute_url = item_absolute_url ,
258+ rewriten_url = item_absolute_url ,
259+ zim_path = item_path ,
260+ )
246261
247262 except Exception as exc : # pragma: no cover
248263 item_scheme = (
@@ -275,7 +290,11 @@ def __call__(
275290 f"rewrite_all_url: { rewrite_all_url } " ,
276291 exc_info = exc ,
277292 )
278- return item_url
293+ return RewriteResult (
294+ absolute_url = item_absolute_url ,
295+ rewriten_url = item_url ,
296+ zim_path = None ,
297+ )
279298
280299 def get_document_uri (self , item_path : ZimPath , item_fragment : str ) -> str :
281300 """Given an ZIM item path and its fragment, get the URI to use in document
0 commit comments