diff --git a/pyproject.toml b/pyproject.toml index 13e05bca..979a2e06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -254,7 +254,7 @@ ban-relative-imports = "all" # _libkiwix mimics libkiwix C++ code, names obey C++ conventions "src/zimscraperlib/zim/_libkiwix.py" = ["N802", "N803", "N806"] # beartype must be first -"src/zimscraperlib/zim/__init__.py" = ["E402"] +"src/zimscraperlib/__init__.py" = ["E402"] [tool.pytest.ini_options] minversion = "7.3" @@ -278,6 +278,7 @@ exclude_lines = [ "no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:", + "class .*Protocol.*", ] [tool.pyright] diff --git a/src/zimscraperlib/__init__.py b/src/zimscraperlib/__init__.py index 1ff4f461..20c9e55b 100644 --- a/src/zimscraperlib/__init__.py +++ b/src/zimscraperlib/__init__.py @@ -4,6 +4,10 @@ import logging as stdlogging import os +from beartype.claw import beartype_this_package + +beartype_this_package() + from zimscraperlib.constants import NAME from zimscraperlib.logging import getLogger diff --git a/src/zimscraperlib/download.py b/src/zimscraperlib/download.py index ac868385..83ec167e 100644 --- a/src/zimscraperlib/download.py +++ b/src/zimscraperlib/download.py @@ -6,7 +6,7 @@ import pathlib import subprocess from concurrent.futures import Future, ThreadPoolExecutor -from typing import IO, ClassVar +from typing import ClassVar import requests import requests.adapters @@ -16,6 +16,7 @@ from zimscraperlib import logger from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT +from zimscraperlib.typing import SupportsSeekableWrite, SupportsWrite class YoutubeDownloader: @@ -59,11 +60,10 @@ def download( future = self.executor.submit(self._run_youtube_dl, url, options or {}) if not wait: return future - if not future.exception(): - # return the result - return future.result() # pyright: ignore - # raise the exception - raise future.exception() # pyright: ignore + exc = future.exception() + if isinstance(exc, BaseException): + raise exc + return True class YoutubeConfig(dict): @@ -176,7 +176,7 @@ def get_session(max_retries: int | None = 5) -> requests.Session: def stream_file( url: str, fpath: pathlib.Path | None = None, - byte_stream: IO[bytes] | None = None, + byte_stream: SupportsWrite[bytes] | SupportsSeekableWrite[bytes] | None = None, block_size: int | None = 1024, proxies: dict[str, str] | None = None, max_retries: int | None = 5, @@ -216,15 +216,16 @@ def stream_file( total_downloaded = 0 if fpath is not None: - fp = open(fpath, "wb") - elif ( - byte_stream is not None - ): # pragma: no branch (we use a precise condition to help type checker) - fp = byte_stream + fpath_handler = open(fpath, "wb") + else: + fpath_handler = None for data in resp.iter_content(block_size): total_downloaded += len(data) - fp.write(data) + if fpath_handler: + fpath_handler.write(data) + if byte_stream: + byte_stream.write(data) # stop downloading/reading if we're just testing first block if only_first_block: @@ -232,8 +233,8 @@ def stream_file( logger.debug(f"Downloaded {total_downloaded} bytes from {url}") - if fpath: - fp.close() - else: - fp.seek(0) + if fpath_handler: + fpath_handler.close() + elif isinstance(byte_stream, SupportsSeekableWrite) and byte_stream.seekable(): + byte_stream.seek(0) return total_downloaded, resp.headers diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py index 18ee0bb9..a442d81f 100644 --- a/src/zimscraperlib/i18n.py +++ b/src/zimscraperlib/i18n.py @@ -59,12 +59,12 @@ def iso_types(self) -> list[str]: return self["iso_types"] @property - def query(self) -> list[str]: + def query(self) -> str: """Query issued for these language details""" return self["query"] @property - def querytype(self) -> list[str]: + def querytype(self) -> str: """Type of query issued to retrieve language details""" return self["querytype"] diff --git a/src/zimscraperlib/image/conversion.py b/src/zimscraperlib/image/conversion.py index de53bea9..7506f48c 100644 --- a/src/zimscraperlib/image/conversion.py +++ b/src/zimscraperlib/image/conversion.py @@ -5,7 +5,6 @@ import io import pathlib -from typing import IO import cairosvg.svg from PIL.Image import open as pilopen @@ -17,9 +16,9 @@ def convert_image( - src: pathlib.Path | IO[bytes], - dst: pathlib.Path | IO[bytes], - **params: str, + src: pathlib.Path | io.BytesIO, + dst: pathlib.Path | io.BytesIO, + **params: str | None, ) -> None: """convert an image file from one format to another params: Image.save() parameters. Depends on dest format. @@ -31,7 +30,9 @@ def convert_image( to RGB. ex: RGB, ARGB, CMYK (and other PIL colorspaces)""" colorspace = params.get("colorspace") # requested colorspace - fmt = params.pop("fmt").upper() if "fmt" in params else None # requested format + fmt = ( + str(params.pop("fmt")).upper() if params.get("fmt") else None + ) # requested format if not fmt: fmt = format_for(dst) if not fmt: @@ -44,7 +45,7 @@ def convert_image( def convert_svg2png( src: str | pathlib.Path | io.BytesIO, - dst: pathlib.Path | IO[bytes], + dst: pathlib.Path | io.BytesIO, width: int | None = None, height: int | None = None, ): diff --git a/src/zimscraperlib/image/optimization.py b/src/zimscraperlib/image/optimization.py index e2817701..5b9c4305 100644 --- a/src/zimscraperlib/image/optimization.py +++ b/src/zimscraperlib/image/optimization.py @@ -210,14 +210,14 @@ def optimize_webp( else: try: save_image(webp_image, dst, fmt="WEBP", **params) - except Exception as exc: + except Exception as exc: # pragma: no cover if ( isinstance(src, pathlib.Path) and isinstance(dst, pathlib.Path) and src.resolve() != dst.resolve() and dst.exists() ): - dst.unlink() # pragma: no cover + dst.unlink() raise exc return dst diff --git a/src/zimscraperlib/image/probing.py b/src/zimscraperlib/image/probing.py index 06b89ae3..213425dd 100644 --- a/src/zimscraperlib/image/probing.py +++ b/src/zimscraperlib/image/probing.py @@ -7,7 +7,6 @@ import io import pathlib import re -from typing import IO import colorthief import PIL.Image @@ -55,7 +54,7 @@ def is_hex_color(text: str) -> bool: def format_for( - src: pathlib.Path | IO[bytes], + src: pathlib.Path | io.BytesIO, *, from_suffix: bool = True, ) -> str | None: @@ -95,7 +94,7 @@ def format_for( def is_valid_image( - image: pathlib.Path | IO[bytes] | bytes, + image: pathlib.Path | bytes | io.BytesIO, imformat: str, size: tuple[int, int] | None = None, ) -> bool: diff --git a/src/zimscraperlib/image/utils.py b/src/zimscraperlib/image/utils.py index af1ed57e..306b40b8 100644 --- a/src/zimscraperlib/image/utils.py +++ b/src/zimscraperlib/image/utils.py @@ -2,8 +2,9 @@ # vim: ai ts=4 sts=4 et sw=4 nu from __future__ import annotations +import io import pathlib -from typing import IO +from typing import IO, Any from PIL.Image import Image from PIL.ImageFile import ImageFile @@ -11,9 +12,9 @@ def save_image( src: Image | ImageFile, - dst: pathlib.Path | IO[bytes], + dst: pathlib.Path | IO[bytes] | io.BytesIO, fmt: str, - **params: str, + **params: Any, ) -> None: """PIL.Image.save() wrapper setting default parameters""" args = {"JPEG": {"quality": 100}, "PNG": {}}.get(fmt, {}) diff --git a/src/zimscraperlib/logging.py b/src/zimscraperlib/logging.py index c60a3b25..68937594 100644 --- a/src/zimscraperlib/logging.py +++ b/src/zimscraperlib/logging.py @@ -3,6 +3,7 @@ from __future__ import annotations +import io import logging import pathlib import sys @@ -22,7 +23,7 @@ def getLogger( # noqa: N802 (intentionally matches the stdlib getLogger name) name: str, level: int = logging.INFO, - console: TextIO | None = sys.stdout, + console: TextIO | io.StringIO | None = sys.stdout, log_format: str | None = DEFAULT_FORMAT, file: pathlib.Path | None = None, file_level: int | None = None, diff --git a/src/zimscraperlib/rewriting/css.py b/src/zimscraperlib/rewriting/css.py index c04cbcf0..db553f28 100644 --- a/src/zimscraperlib/rewriting/css.py +++ b/src/zimscraperlib/rewriting/css.py @@ -186,29 +186,27 @@ def _process_node(self, node: ast.Node): ) elif isinstance(node, ast.FunctionBlock): if node.lower_name == "url": # pyright: ignore[reportUnknownMemberType] - url_node: ast.Node = node.arguments[0] # pyright: ignore + url_node: ast.Node = node.arguments[0] new_url = self.url_rewriter( - url_node.value, # pyright: ignore + getattr(url_node, "value", ""), self.base_href, ).rewriten_url - url_node.value = str(new_url) # pyright: ignore - url_node.representation = ( # pyright: ignore - f'"{serialize_url(str(new_url))}"' + setattr(url_node, "value", str(new_url)) # noqa: B010 + setattr( # noqa: B010 + url_node, "representation", f'"{serialize_url(str(new_url))}"' ) else: self._process_list( - node.arguments, # pyright: ignore + getattr(node, "arguments", []), ) elif isinstance(node, ast.AtRule): - self._process_list(node.prelude) # pyright: ignore - self._process_list(node.content) # pyright: ignore + self._process_list(node.prelude) + self._process_list(node.content) elif isinstance(node, ast.Declaration): - self._process_list(node.value) # pyright: ignore + self._process_list(node.value) elif isinstance(node, ast.URLToken): - new_url = self.url_rewriter( - node.value, self.base_href - ).rewriten_url # pyright: ignore + new_url = self.url_rewriter(node.value, self.base_href).rewriten_url node.value = new_url node.representation = f"url({serialize_url(new_url)})" diff --git a/src/zimscraperlib/rewriting/html.py b/src/zimscraperlib/rewriting/html.py index d1933bbd..ea24b912 100644 --- a/src/zimscraperlib/rewriting/html.py +++ b/src/zimscraperlib/rewriting/html.py @@ -560,7 +560,7 @@ def rewrite_meta_charset_content( return if attr_name == "charset": return (attr_name, "UTF-8") - if attr_name == "content" and any( + if attr_name == "content" and any( # pragma: no coverage (coverage bug) attr_name.lower() == "http-equiv" and attr_value and attr_value.lower() == "content-type" @@ -574,7 +574,9 @@ def rewrite_onxxx_tags( attr_name: str, attr_value: str | None, js_rewriter: JsRewriter ) -> AttrNameAndValue | None: """Rewrite onxxx script attributes""" - if attr_value and attr_name.startswith("on") and not attr_name.startswith("on-"): + if ( + attr_value and attr_name.startswith("on") and not attr_name.startswith("on-") + ): # pragma: no coverage (coverage bug) return (attr_name, js_rewriter.rewrite(attr_value)) @@ -583,7 +585,7 @@ def rewrite_style_tags( attr_name: str, attr_value: str | None, css_rewriter: CssRewriter ) -> AttrNameAndValue | None: """Rewrite style attributes""" - if attr_value and attr_name == "style": + if attr_value and attr_name == "style": # pragma: no coverage (coverage bug) return (attr_name, css_rewriter.rewrite_inline(attr_value)) diff --git a/src/zimscraperlib/rewriting/url_rewriting.py b/src/zimscraperlib/rewriting/url_rewriting.py index d661acd1..4c76e19b 100644 --- a/src/zimscraperlib/rewriting/url_rewriting.py +++ b/src/zimscraperlib/rewriting/url_rewriting.py @@ -41,8 +41,9 @@ from __future__ import annotations import re +from dataclasses import dataclass from pathlib import PurePosixPath -from typing import ClassVar, NamedTuple +from typing import ClassVar from urllib.parse import quote, unquote, urljoin, urlsplit import idna @@ -51,7 +52,8 @@ from zimscraperlib.rewriting.rules import FUZZY_RULES -class AdditionalRule(NamedTuple): +@dataclass +class AdditionalRule: match: re.Pattern[str] replace: str @@ -147,7 +149,8 @@ def check_validity(cls, value: str) -> None: raise ValueError(f"Unexpected password in value: {value} {parts.password}") -class RewriteResult(NamedTuple): +@dataclass +class RewriteResult: absolute_url: str rewriten_url: str zim_path: ZimPath | None @@ -382,9 +385,6 @@ def normalize(cls, url: HttpUrl) -> ZimPath: passed to python-libzim for UTF-8 encoding. """ - if not isinstance(url, HttpUrl): - raise ValueError("Bad argument type passed, HttpUrl expected") - url_parts = urlsplit(url.value) if not url_parts.hostname: diff --git a/src/zimscraperlib/types.py b/src/zimscraperlib/types.py index 228b2103..35de13ac 100644 --- a/src/zimscraperlib/types.py +++ b/src/zimscraperlib/types.py @@ -47,13 +47,10 @@ def get_mime_for_name( MIME only guessed from file extension and not actual content. Filename with no extension are mapped to `no_ext_to`""" - try: - filename = pathlib.Path(filename) - if not filename.suffix: - return no_ext_to - return mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback - except Exception: - return fallback + filename = pathlib.Path(filename) + if not filename.suffix: + return no_ext_to + return mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback def init_types(): diff --git a/src/zimscraperlib/typing.py b/src/zimscraperlib/typing.py index 237f1ae7..ab14ebff 100644 --- a/src/zimscraperlib/typing.py +++ b/src/zimscraperlib/typing.py @@ -1,10 +1,15 @@ from __future__ import annotations from collections.abc import Callable -from typing import Any, NamedTuple +from dataclasses import dataclass +from typing import Any, Protocol, TypeVar, runtime_checkable +_T_co = TypeVar("_T_co", covariant=True) +_T_contra = TypeVar("_T_contra", contravariant=True) -class Callback(NamedTuple): + +@dataclass +class Callback: func: Callable args: tuple[Any, ...] | None = None kwargs: dict[str, Any] | None = None @@ -24,3 +29,34 @@ def call_with(self, *args, **kwargs): def call(self): self.call_with(*self.get_args(), **self.get_kwargs()) + + +@runtime_checkable +class SupportsWrite(Protocol[_T_contra]): + """Protocol exposing an expected write method""" + + def write(self, s: _T_contra, /) -> object: ... + + +@runtime_checkable +class SupportsRead(Protocol[_T_co]): + def read(self, length: int = ..., /) -> _T_co: ... + + +@runtime_checkable +class SupportsSeeking(Protocol): + def seekable(self) -> bool: ... + + def seek(self, target: int, whence: int = 0) -> int: ... + + def tell(self) -> int: ... + + def truncate(self, pos: int) -> int: ... + + +@runtime_checkable +class SupportsSeekableRead(SupportsRead[_T_co], SupportsSeeking, Protocol): ... + + +@runtime_checkable +class SupportsSeekableWrite(SupportsWrite[_T_contra], SupportsSeeking, Protocol): ... diff --git a/src/zimscraperlib/uri.py b/src/zimscraperlib/uri.py index 323b826d..6e60a186 100644 --- a/src/zimscraperlib/uri.py +++ b/src/zimscraperlib/uri.py @@ -4,7 +4,6 @@ import urllib.parse -from zimscraperlib import logger from zimscraperlib.misc import first @@ -19,39 +18,26 @@ def rebuild_uri( params: str | None = None, query: str | None = None, fragment: str | None = None, - *, - failsafe: bool = False, ) -> urllib.parse.ParseResult: """new ParseResult named tuple from uri with requested part updated""" - try: - username = first(username, uri.username) - password = first(password, uri.password) - hostname = first(hostname, uri.hostname) - port = first(port, uri.port) - netloc = ( - f"{username}{':' if password else ''}{password}" - f"{'@' if username or password else ''}{hostname}" - f"{':' if port else ''}{port}" - ) - return urllib.parse.urlparse( - urllib.parse.urlunparse( - ( - first(scheme, uri.scheme), - netloc, - first(path, uri.path), - first(params, uri.params), - first(query, uri.query), - first(fragment, uri.fragment), - ) + username = first(username, uri.username) + password = first(password, uri.password) + hostname = first(hostname, uri.hostname) + port = first(port, uri.port) + netloc = ( + f"{username}{':' if password else ''}{password}" + f"{'@' if username or password else ''}{hostname}" + f"{':' if port else ''}{port}" + ) + return urllib.parse.urlparse( + urllib.parse.urlunparse( + ( + first(scheme, uri.scheme), + netloc, + first(path, uri.path), + first(params, uri.params), + first(query, uri.query), + first(fragment, uri.fragment), ) ) - except Exception as exc: - if failsafe: - logger.error( - f"Failed to rebuild " # lgtm [py/clear-text-logging-sensitive-data] - f"URI {uri} with scheme={scheme} username={username} " - f"password={password} hostname={hostname} port={port} path={path} " - f"params={params} query={query} fragment={fragment} - {exc}" - ) - return uri - raise exc + ) diff --git a/src/zimscraperlib/video/config.py b/src/zimscraperlib/video/config.py index 15d8ecac..16658a8f 100644 --- a/src/zimscraperlib/video/config.py +++ b/src/zimscraperlib/video/config.py @@ -116,7 +116,7 @@ def buffersize(self, value): @property def video_scale(self): # remove "scale='" and "'" and return the value in between - return self.get("-vf")[7:-1] if self.get("-vf") else None # pyright: ignore + return self.get("-vf", [])[7:-1] if self.get("-vf") else None @video_scale.setter def video_scale(self, value): diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py index da74ab69..9d82c1ea 100644 --- a/src/zimscraperlib/zim/__init__.py +++ b/src/zimscraperlib/zim/__init__.py @@ -9,10 +9,7 @@ zim.items: item to add to creator zim.archive: read ZIM files, accessing or searching its content""" -from beartype.claw import beartype_this_package -from libzim.writer import Blob # pyright: ignore - -beartype_this_package() +from libzim.writer import Blob # pyright: ignore[reportMissingModuleSource] from zimscraperlib.zim.archive import Archive from zimscraperlib.zim.creator import Creator diff --git a/src/zimscraperlib/zim/archive.py b/src/zimscraperlib/zim/archive.py index 891fb976..1f4dda89 100644 --- a/src/zimscraperlib/zim/archive.py +++ b/src/zimscraperlib/zim/archive.py @@ -13,9 +13,9 @@ from collections.abc import Iterable -import libzim.reader # pyright: ignore -import libzim.search # Query, Searcher # pyright: ignore -import libzim.suggestion # SuggestionSearcher # pyright: ignore +import libzim.reader # pyright: ignore[reportMissingModuleSource] +import libzim.search # pyright: ignore[reportMissingModuleSource] +import libzim.suggestion # pyright: ignore[reportMissingModuleSource] from zimscraperlib.zim._libkiwix import CounterMap, convertTags, parseMimetypeCounter diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index e18e9e85..99fe6dee 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -25,7 +25,7 @@ import re import weakref -import libzim.writer # pyright: ignore +import libzim.writer # pyright: ignore[reportMissingModuleSource] import PIL.Image from zimscraperlib import logger diff --git a/src/zimscraperlib/zim/indexing.py b/src/zimscraperlib/zim/indexing.py index b1505456..807bfcf8 100644 --- a/src/zimscraperlib/zim/indexing.py +++ b/src/zimscraperlib/zim/indexing.py @@ -5,7 +5,7 @@ import io import pathlib -import libzim.writer # pyright: ignore +import libzim.writer # pyright: ignore[reportMissingModuleSource] try: import pymupdf diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py index e1f9e9b2..05221f47 100644 --- a/src/zimscraperlib/zim/items.py +++ b/src/zimscraperlib/zim/items.py @@ -13,7 +13,7 @@ from collections.abc import Callable from typing import Any -import libzim.writer # pyright: ignore +import libzim.writer # pyright: ignore[reportMissingModuleSource] from zimscraperlib.download import stream_file from zimscraperlib.filesystem import get_content_mimetype, get_file_mimetype diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py index b7e8b012..fdc9652a 100644 --- a/src/zimscraperlib/zim/metadata.py +++ b/src/zimscraperlib/zim/metadata.py @@ -19,6 +19,7 @@ from zimscraperlib.i18n import is_valid_iso_639_3 from zimscraperlib.image.probing import is_valid_image from zimscraperlib.inputs import unique_values +from zimscraperlib.typing import SupportsRead, SupportsSeekableRead # All control characters are disallowed in str metadata except \n, \r and \t UNWANTED_CONTROL_CHARACTERS_REGEX = regex.compile(r"(?![\n\t\r])\p{C}") @@ -194,7 +195,10 @@ def get_encoded(value: str) -> bytes: def libzim_value(self) -> bytes: return self.get_libzim_value() - def get_binary_from(self, value: bytes | io.IOBase | io.BytesIO) -> bytes: + def get_binary_from( + self, + value: bytes | SupportsRead[bytes] | SupportsSeekableRead[bytes] | io.BytesIO, + ) -> bytes: bvalue: bytes = b"" if isinstance(value, io.BytesIO): bvalue = value.getvalue() @@ -202,10 +206,10 @@ def get_binary_from(self, value: bytes | io.IOBase | io.BytesIO) -> bytes: bvalue = value else: last_pos: int - if value.seekable(): + if isinstance(value, SupportsSeekableRead) and value.seekable(): last_pos = value.tell() bvalue = value.read() - if value.seekable(): + if isinstance(value, SupportsSeekableRead) and value.seekable(): value.seek(last_pos) if not self.empty_allowed and not value: raise ValueError("Missing value (empty not allowed)") diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py index a4748cbb..4a7ddec8 100644 --- a/src/zimscraperlib/zim/providers.py +++ b/src/zimscraperlib/zim/providers.py @@ -15,7 +15,7 @@ import pathlib from collections.abc import Generator -import libzim.writer # pyright: ignore +import libzim.writer # pyright: ignore[reportMissingModuleSource] import requests from zimscraperlib.download import _get_retry_adapter, stream_file @@ -60,7 +60,7 @@ def __init__( self.fileobj.seek(0, io.SEEK_SET) def get_size(self) -> int: - return self.size # pyright: ignore + return getattr(self, "size", -1) def gen_blob(self) -> Generator[libzim.writer.Blob, None, None]: yield libzim.writer.Blob(self.fileobj.getvalue()) # pragma: no cover @@ -91,7 +91,7 @@ def get_size_of(url) -> int | None: return None def get_size(self) -> int: - return self.size # pyright: ignore + return getattr(self, "size", -1) def gen_blob(self) -> Generator[libzim.writer.Blob, None, None]: # pragma: no cover for chunk in self.resp.iter_content(10 * 1024): diff --git a/tasks.py b/tasks.py index 87cd5529..a7ca16ab 100644 --- a/tasks.py +++ b/tasks.py @@ -2,7 +2,7 @@ import os from invoke.context import Context -from invoke.tasks import task # pyright: ignore [reportUnknownVariableType] +from invoke.tasks import task # pyright: ignore[reportUnknownVariableType] use_pty = not os.getenv("CI", "") diff --git a/tests/conftest.py b/tests/conftest.py index a915826f..15e93ecf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -167,7 +167,7 @@ def valid_user_agent(): def small_zim_file(tmpdir_factory): from zimscraperlib.download import stream_file - dst = tmpdir_factory.mktemp("data").join("small.zim") + dst = pathlib.Path(tmpdir_factory.mktemp("data").join("small.zim")) stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/nons/small.zim", dst, @@ -179,7 +179,7 @@ def small_zim_file(tmpdir_factory): def ns_zim_file(tmpdir_factory): from zimscraperlib.download import stream_file - dst = tmpdir_factory.mktemp("data").join("ns.zim") + dst = pathlib.Path(tmpdir_factory.mktemp("data").join("ns.zim")) stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.4/data/withns/" "wikibooks_be_all_nopic_2017-02.zim", @@ -192,7 +192,7 @@ def ns_zim_file(tmpdir_factory): def real_zim_file(tmpdir_factory): from zimscraperlib.download import stream_file - dst = tmpdir_factory.mktemp("data").join("small.zim") + dst = pathlib.Path(tmpdir_factory.mktemp("data").join("small.zim")) stream_file( "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/withns/" "wikipedia_en_climate_change_nopic_2020-01.zim", diff --git a/tests/download/test_download.py b/tests/download/test_download.py index 75f23698..b450a7a7 100644 --- a/tests/download/test_download.py +++ b/tests/download/test_download.py @@ -71,6 +71,29 @@ def test_first_block_download_default_session(valid_http_url): assert len(byte_stream.read()) <= expected +def test_filehandler(tmp_path, valid_http_url): + dest_file = pathlib.Path(tmp_path / "favicon.ico") + + def notseekable(): + return False + + with open(dest_file, "wb") as byte_stream: + assert byte_stream.seekable() + size, ret = stream_file( + url=valid_http_url, byte_stream=byte_stream, only_first_block=True + ) + assert_headers(ret) + assert byte_stream.tell() == 0 + + byte_stream.seekable = notseekable + assert not byte_stream.seekable() + size, ret = stream_file( + url=valid_http_url, byte_stream=byte_stream, only_first_block=True + ) + assert_headers(ret) + assert byte_stream.tell() > 0 + + def test_first_block_download_custom_session(mocker, valid_http_url): byte_stream = io.BytesIO() custom_session = mocker.Mock(spec=requests.Session) @@ -95,7 +118,7 @@ def test_first_block_download_custom_session(mocker, valid_http_url): headers=None, timeout=DEFAULT_WEB_REQUESTS_TIMEOUT, ) - requests.Session.assert_not_called() # pyright: ignore + requests.Session.assert_not_called() # pyright: ignore[reportAttributeAccessIssue] @pytest.mark.slow @@ -137,6 +160,17 @@ def test_stream_to_bytes(valid_https_url): ) +@pytest.mark.slow +def test_unseekable_stream(valid_https_url): + def notseekable(): + return False + + byte_stream = io.BytesIO() + byte_stream.seekable = notseekable + size, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) + assert_headers(ret) + + @pytest.mark.slow def test_save_parent_folder_missing(tmp_path, valid_http_url): dest_file = tmp_path / "some-folder" / "favicon.ico" @@ -192,12 +226,13 @@ def test_youtube_download_nowait(tmp_path): BestMp4.get_options(target_dir=tmp_path), wait=False, ) - assert future.running() # pyright: ignore + assert future.running() # pyright: ignore[reportAttributeAccessIssue] assert not yt_downloader.executor._shutdown done, not_done = concurrent.futures.wait( - [future], return_when=concurrent.futures.ALL_COMPLETED # pyright: ignore + [future], # pyright: ignore[reportArgumentType] + return_when=concurrent.futures.ALL_COMPLETED, ) - assert future.exception() is None # pyright: ignore + assert future.exception() is None # pyright: ignore[reportAttributeAccessIssue] assert len(done) == 1 assert len(not_done) == 0 diff --git a/tests/image/test_image.py b/tests/image/test_image.py index 0441ebb0..507057bd 100644 --- a/tests/image/test_image.py +++ b/tests/image/test_image.py @@ -9,7 +9,6 @@ import pathlib import re import shutil -from subprocess import CalledProcessError import piexif import pytest @@ -29,7 +28,6 @@ optimize_image, optimize_jpeg, optimize_png, - optimize_webp, ) from zimscraperlib.image.presets import ( GifHigh, @@ -701,25 +699,6 @@ def test_format_for_cannot_use_suffix_with_byte_array(): assert format_for(src=io.BytesIO(), from_suffix=True) -def test_optimize_webp_gif_failure(tmp_path, webp_image, gif_image): - dst = tmp_path.joinpath("image.img") - - # webp - with pytest.raises(TypeError): - optimize_webp( - webp_image, dst, lossless="bad" # pyright: ignore[reportArgumentType] - ) - assert not dst.exists() - - # gif - dst.touch() # fake temp file created during optim (actually fails before) - with pytest.raises(CalledProcessError): - optimize_gif( - gif_image, dst, optimize_level="bad" # pyright: ignore[reportArgumentType] - ) - assert not dst.exists() - - def test_wrong_extension_optim(tmp_path, png_image): dst = tmp_path.joinpath("image.jpg") shutil.copy(png_image, dst) @@ -734,7 +713,6 @@ def test_is_valid_image(png_image, png_image2, jpg_image, font): assert is_valid_image(png_image, "PNG", (48, 48)) assert not is_valid_image(png_image2, "PNG", (48, 48)) assert not is_valid_image(b"", "PNG") - assert not is_valid_image(34, "PNG") # pyright: ignore[reportArgumentType] assert not is_valid_image(font, "PNG") with open(png_image, "rb") as fh: assert is_valid_image(fh.read(), "PNG", (48, 48)) diff --git a/tests/rewriting/test_url_rewriting.py b/tests/rewriting/test_url_rewriting.py index fcc80bc8..54775134 100644 --- a/tests/rewriting/test_url_rewriting.py +++ b/tests/rewriting/test_url_rewriting.py @@ -107,16 +107,6 @@ def test_normalize(self, url, zim_path): ArticleUrlRewriter.normalize(HttpUrl(url)).value == ZimPath(zim_path).value ) - def test_normalize_bad_arg( - self, - ): - with pytest.raises( - ValueError, match="Bad argument type passed, HttpUrl expected" - ): - ArticleUrlRewriter.normalize( - "https://www.acme.com" # pyright: ignore[reportArgumentType] - ) - class TestArticleUrlRewriter: @pytest.mark.parametrize( @@ -1019,7 +1009,7 @@ def test_http_urls_hash(self): def test_http_urls_str(self): assert str(HttpUrl("http://bob@acme.com")) == "HttpUrl(http://bob@acme.com)" - assert f"{HttpUrl("http://bob@acme.com")}" == "HttpUrl(http://bob@acme.com)" + assert f'{HttpUrl("http://bob@acme.com")}' == "HttpUrl(http://bob@acme.com)" def test_bad_http_urls_no_host(self): with pytest.raises(ValueError, match="Unsupported empty hostname in value"): @@ -1099,4 +1089,4 @@ def test_zim_path_hash(self): def test_zim_path_str(self): assert str(ZimPath("content/index.html")) == "ZimPath(content/index.html)" - assert f"{ZimPath("content/index.html")}" == "ZimPath(content/index.html)" + assert f'{ZimPath("content/index.html")}' == "ZimPath(content/index.html)" diff --git a/tests/types/test_types.py b/tests/types/test_types.py index 2ee602d1..2f26e97f 100644 --- a/tests/types/test_types.py +++ b/tests/types/test_types.py @@ -18,8 +18,6 @@ def test_constants(): [ ("hello.html", "text/html", None, None), ("some picture.png", "image/png", None, None), - # make sure we get default fallback on error - (b"-", "application/octet-stream", None, None), # make sure fallback is not returned on success ("hello.html", "text/html", "text/plain", None), # make sure fallback is returned on missing diff --git a/tests/zim/test_typing.py b/tests/typing/test_typing.py similarity index 100% rename from tests/zim/test_typing.py rename to tests/typing/test_typing.py diff --git a/tests/uri/test_uri.py b/tests/uri/test_uri.py index 494dce6e..91493e96 100644 --- a/tests/uri/test_uri.py +++ b/tests/uri/test_uri.py @@ -52,25 +52,3 @@ ) def test_rebuild_uri(uri, changes, expected): assert rebuild_uri(urllib.parse.urlparse(uri), **changes).geturl() == expected - - -@pytest.mark.parametrize( - "uri,changes", - [ - ("http://localhost", {"scheme": 123}), - ], -) -def test_rebuild_uri_failure(uri, changes): - with pytest.raises(TypeError): - rebuild_uri(urllib.parse.urlparse(uri), **changes) - - -@pytest.mark.parametrize( - "uri,changes", - [ - ("http://localhost", {"scheme": 123}), - ], -) -def test_rebuild_uri_failsafe(uri, changes): - puri = urllib.parse.urlparse(uri) - assert rebuild_uri(puri, **changes, failsafe=True) == puri diff --git a/tests/video/test_video.py b/tests/video/test_video.py index ad822c5a..61271264 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -84,7 +84,9 @@ def test_config_build_from(): assert idx != -1 assert args[idx + 1] == str(getattr(config, attr)) video_scale = config.video_scale - qmin, qmax = config.quantizer_scale_range # pyright: ignore + qmin, qmax = ( + config.quantizer_scale_range # pyright: ignore[reportGeneralTypeIssues] + ) assert args.index("-qmin") != -1 and args[args.index("-qmin") + 1] == str(qmin) assert args.index("-qmax") != -1 and args[args.index("-qmax") + 1] == str(qmax) assert ( diff --git a/tests/zim/test_indexing.py b/tests/zim/test_indexing.py index 63f6c05b..9cc036c5 100644 --- a/tests/zim/test_indexing.py +++ b/tests/zim/test_indexing.py @@ -1,7 +1,7 @@ import io import pathlib -import libzim.writer # pyright: ignore +import libzim.writer # pyright: ignore[reportMissingModuleSource] import pytest from zimscraperlib.zim import Archive, Creator diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py index 434d5b11..b05b1f3e 100644 --- a/tests/zim/test_metadata.py +++ b/tests/zim/test_metadata.py @@ -118,7 +118,7 @@ def test_validate_tags_invalid( value: list[str] | str | int, exception: type, error: str ): with pytest.raises(exception, match=re.escape(error)): - metadata.TagsMetadata(value) # pyright: ignore [reportArgumentType] + metadata.TagsMetadata(value) # pyright: ignore[reportArgumentType] def test_validate_dedup_tags(): diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index 78c324ea..36380374 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -17,7 +17,7 @@ from unittest.mock import call, patch import pytest -from libzim.writer import Compression # pyright: ignore +from libzim.writer import Compression # pyright: ignore[reportMissingModuleSource] from zimscraperlib.constants import UTF8 from zimscraperlib.download import save_large_file, stream_file