From 6919885328199a5ccedbc1f1348f3d2fbda0f9de Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 02:00:21 +0100 Subject: [PATCH 1/8] Enable checking against mypy with minimal set of fixes 4 significant checks are disabled: * `strict_optional` - check that an object might be `None` on access * `warn_no_return` - requiring a `return` * `assignment` - any assignment of a variable of a wrong type * `var-annotated` - a type annotation is missing --- .github/workflows/tox.yml | 2 +- markdown/blockprocessors.py | 2 +- markdown/core.py | 6 ++-- markdown/extensions/abbr.py | 2 +- markdown/extensions/admonition.py | 2 +- markdown/extensions/attr_list.py | 2 +- markdown/extensions/codehilite.py | 2 +- markdown/extensions/fenced_code.py | 2 +- markdown/extensions/footnotes.py | 4 +-- markdown/extensions/md_in_html.py | 2 +- markdown/extensions/meta.py | 2 +- markdown/extensions/smarty.py | 9 +++-- markdown/extensions/toc.py | 6 ++-- markdown/extensions/wikilinks.py | 1 + markdown/htmlparser.py | 6 ++-- markdown/inlinepatterns.py | 53 +++++++++++++++++++----------- markdown/postprocessors.py | 3 +- markdown/serializers.py | 4 +-- markdown/test_tools.py | 2 +- markdown/treeprocessors.py | 27 ++++++++------- markdown/util.py | 4 +-- pyproject.toml | 8 +++++ tox.ini | 11 ++++++- 23 files changed, 99 insertions(+), 63 deletions(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8f45e22e6..f80c0c6eb 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -71,7 +71,7 @@ jobs: fail-fast: false max-parallel: 4 matrix: - tox-env: [flake8, pep517check, checkspelling] + tox-env: [mypy, flake8, pep517check, checkspelling] env: TOXENV: ${{ matrix.tox-env }} diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d2020b9b6..723ef7884 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -175,7 +175,7 @@ def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or - (len(parent) and parent[-1] is not None and + (len(parent) > 0 and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) def run(self, parent: etree.Element, blocks: list[str]) -> None: diff --git a/markdown/core.py b/markdown/core.py index 6c7a21be9..8c8305b2b 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -85,7 +85,7 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Creates a new Markdown instance. @@ -445,8 +445,8 @@ def convertFile( # Don't close here. User may want to write more. else: # Encode manually and write bytes to stdout. - html = html.encode(encoding, "xmlcharrefreplace") - sys.stdout.buffer.write(html) + html_bytes = html.encode(encoding, "xmlcharrefreplace") + sys.stdout.buffer.write(html_bytes) return self diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 738368afe..179768e75 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -94,7 +94,7 @@ def __init__(self, pattern: str, title: str): super().__init__(pattern) self.title = title - def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # type: ignore[override] abbr = etree.Element('abbr') abbr.text = AtomicString(m.group('abbr')) abbr.set('title', self.title) diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py index d0e97002d..01c2316d2 100644 --- a/markdown/extensions/admonition.py +++ b/markdown/extensions/admonition.py @@ -59,7 +59,7 @@ def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.current_sibling: etree.Element | None = None - self.content_indention = 0 + self.content_indent = 0 def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]: """Get sibling admonition. diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 7ce3f9925..e1d4b0fe9 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -56,7 +56,7 @@ def _handle_word(s, t): return t, t -_scanner = re.Scanner([ +_scanner = re.Scanner([ # type: ignore[attr-defined] (r'[^ =]+=".*?"', _handle_double_quote), (r"[^ =]+='.*?'", _handle_single_quote), (r'[^ =]+=[^ =]+', _handle_key_value), diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index 0114908f6..d58ea1531 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -161,7 +161,7 @@ def hilite(self, shebang: bool = True) -> str: lexer = get_lexer_by_name('text', **self.options) if not self.lang: # Use the guessed lexer's language instead - self.lang = lexer.aliases[0] + self.lang = lexer.aliases[0] # type: ignore[attr-defined] lang_str = f'{self.lang_prefix}{self.lang}' if isinstance(self.pygments_formatter, str): try: diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index da1a9be1e..40e01018e 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -159,7 +159,7 @@ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str] """ Return tuple: `(id, [list, of, classes], {configs})` """ id = '' classes = [] - configs = {} + configs: dict[str, Any] = {} for k, v in attrs: if k == 'id': id = v diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 30c081138..70f8fff03 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -38,7 +38,7 @@ class FootnoteExtension(Extension): """ Footnote Extension. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Setup configs. """ self.config = { @@ -290,7 +290,7 @@ def detectTabbed(self, blocks: list[str]) -> list[str]: break return fn_blocks - def detab(self, block: str) -> str: + def detab(self, block: str) -> str: # type: ignore[override] """ Remove one level of indent from a block. Preserve lazily indented blocks by only removing indent from indented lines. diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 64b84a5f4..5e107f40c 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -56,7 +56,7 @@ def __init__(self, md: Markdown, *args, **kwargs): self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags) self.span_and_blocks_tags = self.block_tags | self.span_tags - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index cb703399b..f26ed78d1 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -78,7 +78,7 @@ def run(self, lines: list[str]) -> list[str]: else: lines.insert(0, line) break # no meta data - done - self.md.Meta = meta + self.md.Meta = meta # type: ignore[attr-defined] return lines diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 0ce7772a7..d51c3b061 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -179,7 +179,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class SmartyExtension(Extension): """ Add Smarty to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'smart_quotes': [True, 'Educate quotes'], 'smart_angled_quotes': [False, 'Educate angled quotes'], @@ -199,9 +199,8 @@ def _addPatterns( serie: str, priority: int, ): - for ind, pattern in enumerate(patterns): - pattern += (md,) - pattern = SubstituteTextPattern(*pattern) + for ind, pattern_args in enumerate(patterns): + pattern = SubstituteTextPattern(*pattern_args, md) name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) @@ -253,7 +252,7 @@ def educateQuotes(self, md: Markdown) -> None: ) self._addPatterns(md, patterns, 'quotes', 30) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown): configs = self.getConfigs() self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index a17d7241c..6d39ec834 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -80,7 +80,7 @@ def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> st def _html_sub(m: re.Match[str]) -> str: """ Substitute raw html with plain text. """ try: - raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + raw: str = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) # Strip out tags and/or entities - leaving text @@ -335,8 +335,8 @@ def run(self, doc: etree.Element) -> None: toc = self.md.serializer(div) for pp in self.md.postprocessors: toc = pp.run(toc) - self.md.toc_tokens = toc_tokens - self.md.toc = toc + self.md.toc_tokens = toc_tokens # type: ignore[attr-defined] + self.md.toc = toc # type: ignore[attr-defined] class TocExtension(Extension): diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 3f3cbe2dd..b3bcbfd8c 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -65,6 +65,7 @@ def __init__(self, pattern: str, config: dict[str, Any]): self.config = config def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: + a: etree.Element | str if m.group(1).strip(): base_url, end_url, html_class = self._getMeta() label = m.group(1).strip() diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 5155ef69d..3e3e9e8c5 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,7 +28,7 @@ import re import importlib.util import sys -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Any, Sequence if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -37,7 +37,7 @@ # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. # Users can still do `from html import parser` and get the default behavior. spec = importlib.util.find_spec('html.parser') -htmlparser = importlib.util.module_from_spec(spec) +htmlparser: Any = importlib.util.module_from_spec(spec) spec.loader.exec_module(htmlparser) sys.modules['htmlparser'] = htmlparser @@ -93,7 +93,7 @@ def __init__(self, md: Markdown, *args, **kwargs): super().__init__(*args, **kwargs) self.md = md - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index ef6f0fbc6..2fdd3dffd 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -47,10 +47,11 @@ try: # pragma: no cover from html import entities except ImportError: # pragma: no cover - import htmlentitydefs as entities + import htmlentitydefs as entities # type: ignore if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from . import treeprocessors def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]: @@ -191,7 +192,7 @@ class EmStrongItem(NamedTuple): # ----------------------------------------------------------------------------- -class Pattern: # pragma: no cover +class _BasePattern: """ Base class that inline patterns subclass. @@ -241,31 +242,20 @@ def getCompiledRegExp(self) -> re.Pattern: """ Return a compiled regular expression. """ return self.compiled_re - def handleMatch(self, m: re.Match[str]) -> etree.Element | str: - """Return a ElementTree element from the given match. - - Subclasses should override this method. - - Arguments: - m: A match object containing a match of the pattern. - - Returns: An ElementTree Element object. - - """ - pass # pragma: no cover - def type(self) -> str: """ Return class name, to define pattern type """ return self.__class__.__name__ def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ + assert self.md is not None try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inlineprocessor: treeprocessors.InlineProcessor = self.md.treeprocessors['inline'] + stash = inlineprocessor.stashed_nodes except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) if id in stash: value = stash.get(id) @@ -277,6 +267,27 @@ def get_stash(m): return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) +class LegacyPattern(_BasePattern): + def handleMatch(self, m: re.Match[str]) -> etree.Element | str: + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Arguments: + m: A match object containing a match of the pattern. + + Returns: An ElementTree Element object. + + """ + pass # pragma: no cover + + +if TYPE_CHECKING: # pragma: no cover + Pattern = _BasePattern +else: + Pattern = LegacyPattern + + class InlineProcessor(Pattern): """ Base class that inline processors subclass. @@ -508,13 +519,14 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inline_processor: treeprocessors.InlineProcessor = self.md.treeprocessors['inline'] + stash = inline_processor.stashed_nodes except KeyError: # pragma: no cover return text def get_stash(m: re.Match[str]) -> str: id = m.group(1) - value = stash.get(id) + value: etree.Element | None = stash.get(id) if value is not None: try: return self.md.serializer(value) @@ -526,7 +538,8 @@ def get_stash(m: re.Match[str]) -> str: def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: - RE = self.md.treeprocessors['unescape'].RE + unescape_processor: treeprocessors.UnescapeTreeprocessor = self.md.treeprocessors['unescape'] + RE = unescape_processor.RE except KeyError: # pragma: no cover return text diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90c..177ee4a7a 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -75,7 +75,8 @@ def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) + raw: str = self.md.htmlStash.rawHtmlBlocks[i] + html = self.stash_to_string(raw) if self.isblocklevel(html): replacements["

{}

".format( self.md.htmlStash.get_placeholder(i))] = html diff --git a/markdown/serializers.py b/markdown/serializers.py index 573b26483..67116b88a 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -45,8 +45,8 @@ from __future__ import annotations -from xml.etree.ElementTree import ProcessingInstruction -from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY +from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName +from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined] import re from typing import Callable, Literal, NoReturn diff --git a/markdown/test_tools.py b/markdown/test_tools.py index 895e44ec5..5f2cfbd8f 100644 --- a/markdown/test_tools.py +++ b/markdown/test_tools.py @@ -29,7 +29,7 @@ from . import markdown, Markdown, util try: - import tidylib + import tidylib # type: ignore except ImportError: tidylib = None diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index dc857204b..789ee6e5c 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -218,7 +218,7 @@ def linkText(text: str | None) -> None: text = data[strartIndex:index] linkText(text) - if not isString(node): # it's Element + if not isinstance(node, str): # it's Element for child in [node] + list(node): if child.tail: if child.tail.strip(): @@ -252,7 +252,7 @@ def linkText(text: str | None) -> None: def __applyPattern( self, - pattern: inlinepatterns.Pattern, + pattern: inlinepatterns.InlineProcessor | inlinepatterns.LegacyPattern, data: str, patternIndex: int, startIndex: int = 0 @@ -271,7 +271,12 @@ def __applyPattern( String with placeholders instead of `ElementTree` elements. """ - new_style = isinstance(pattern, inlinepatterns.InlineProcessor) + if isinstance(pattern, inlinepatterns.InlineProcessor): + new_style = True + new_pattern = pattern + else: + new_style = False + legacy_pattern = pattern for exclude in pattern.ANCESTOR_EXCLUDES: if exclude.lower() in self.ancestors: @@ -282,29 +287,29 @@ def __applyPattern( # Since `handleMatch` may reject our first match, # we iterate over the buffer looking for matches # until we can't find any more. - for match in pattern.getCompiledRegExp().finditer(data, startIndex): - node, start, end = pattern.handleMatch(match, data) + for match in new_pattern.getCompiledRegExp().finditer(data, startIndex): + node, start, end = new_pattern.handleMatch(match, data) if start is None or end is None: startIndex += match.end(0) match = None continue break else: # pragma: no cover - match = pattern.getCompiledRegExp().match(data[startIndex:]) + match = legacy_pattern.getCompiledRegExp().match(data[startIndex:]) leftData = data[:startIndex] if not match: return data, False, 0 if not new_style: # pragma: no cover - node = pattern.handleMatch(match) + node = legacy_pattern.handleMatch(match) start = match.start(0) end = match.end(0) if node is None: return data, True, end - if not isString(node): + if not isinstance(node, str): if not isinstance(node.text, util.AtomicString): # We need to process current node too for child in [node] + list(node): @@ -398,9 +403,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. child.tail = dumby.tail pos = list(currElement).index(child) + 1 tailResult.reverse() - for newChild in tailResult: - self.parent_map[newChild[0]] = currElement - currElement.insert(pos, newChild[0]) + for subChild in tailResult: + self.parent_map[subChild[0]] = currElement + currElement.insert(pos, subChild[0]) if len(child): self.parent_map[child] = currElement stack.append((child, self.ancestors[:])) diff --git a/markdown/util.py b/markdown/util.py index b4642023e..d6ba6d2ef 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -218,7 +218,7 @@ class HtmlStash: in the beginning and replace with place-holders. """ - def __init__(self): + def __init__(self) -> None: """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments self.rawHtmlBlocks: list[str | etree.Element] = [] @@ -309,7 +309,7 @@ class Registry(Generic[_T]): an item using that item's assigned "name". """ - def __init__(self): + def __init__(self) -> None: self._data: dict[str, _T] = {} self._priority: list[_PriorityItem] = [] self._is_sorted = False diff --git a/pyproject.toml b/pyproject.toml index 8c9e9bcfe..99b3c5d3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,5 +95,13 @@ legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension' [tool.setuptools] packages = ['markdown', 'markdown.extensions'] +[tool.setuptools.package-data] +"markdown" = ["py.typed"] + [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} + +[tool.mypy] +strict_optional = false +warn_no_return = false +disable_error_code = 'assignment, var-annotated' diff --git a/tox.ini b/tox.ini index d071054ea..7e528d478 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, mypy, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] @@ -19,6 +19,15 @@ deps = pytidylib pygments=={env:PYGMENTS_VERSION} +[testenv:mypy] +deps = + mypy + types-PyYAML + types-Pygments +allowlist_externals = mypy +commands = mypy {toxinidir}/markdown +skip_install = true + [testenv:flake8] deps = flake8 allowlist_externals = flake8 From dc41e0d851c083dfc1f77a093de19528a114573d Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 18:27:47 +0100 Subject: [PATCH 2/8] This branch has no coverage --- markdown/treeprocessors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 789ee6e5c..228760ac6 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -275,7 +275,7 @@ def __applyPattern( new_style = True new_pattern = pattern else: - new_style = False + new_style = False # pragma: no cover legacy_pattern = pattern for exclude in pattern.ANCESTOR_EXCLUDES: From ab34074aa77668fcf303c4f0a6dfe61a27b91ec7 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 18:29:15 +0100 Subject: [PATCH 3/8] Fixup --- markdown/treeprocessors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 228760ac6..39ecdb858 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -274,8 +274,8 @@ def __applyPattern( if isinstance(pattern, inlinepatterns.InlineProcessor): new_style = True new_pattern = pattern - else: - new_style = False # pragma: no cover + else: # pragma: no cover + new_style = False legacy_pattern = pattern for exclude in pattern.ANCESTOR_EXCLUDES: From 944d89b273d4053130200c213d66f6bd9e0766ce Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 18:33:48 +0100 Subject: [PATCH 4/8] Further reduce changes --- markdown/inlinepatterns.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 1a23d283f..b9ad61392 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -48,7 +48,6 @@ if TYPE_CHECKING: # pragma: no cover from markdown import Markdown - from . import treeprocessors def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]: @@ -247,8 +246,7 @@ def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ assert self.md is not None try: - inlineprocessor: treeprocessors.InlineProcessor = self.md.treeprocessors['inline'] - stash = inlineprocessor.stashed_nodes + stash = self.md.treeprocessors['inline'].stashed_nodes # type: ignore[attr-defined] except KeyError: # pragma: no cover return text @@ -516,8 +514,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - inline_processor: treeprocessors.InlineProcessor = self.md.treeprocessors['inline'] - stash = inline_processor.stashed_nodes + stash = self.md.treeprocessors['inline'].stashed_nodes # type: ignore[attr-defined] except KeyError: # pragma: no cover return text @@ -535,8 +532,7 @@ def get_stash(m: re.Match[str]) -> str: def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: - unescape_processor: treeprocessors.UnescapeTreeprocessor = self.md.treeprocessors['unescape'] - RE = unescape_processor.RE + RE = self.md.treeprocessors['unescape'].RE # type: ignore[attr-defined] except KeyError: # pragma: no cover return text From 7dc2f9d6763912aca4c90e33dfb4304c341ed77a Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 19:55:49 +0100 Subject: [PATCH 5/8] Remove another check --- markdown/inlinepatterns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index b9ad61392..dfd91a9b4 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -244,7 +244,6 @@ def type(self) -> str: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ - assert self.md is not None try: stash = self.md.treeprocessors['inline'].stashed_nodes # type: ignore[attr-defined] except KeyError: # pragma: no cover From 0dca660d51b9852c3f8673f2bb92afae776542b9 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Thu, 2 Nov 2023 20:06:24 +0100 Subject: [PATCH 6/8] Not adding py.typed yet --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 99b3c5d3b..feb7a2fa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,9 +95,6 @@ legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension' [tool.setuptools] packages = ['markdown', 'markdown.extensions'] -[tool.setuptools.package-data] -"markdown" = ["py.typed"] - [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} From 75cb5d78c8904d190129394f45853153c817e03a Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 3 Nov 2023 16:13:41 +0100 Subject: [PATCH 7/8] Remove all `-> None` annotations --- markdown/core.py | 24 ++++++++++++++++-------- markdown/extensions/footnotes.py | 9 ++++++--- markdown/extensions/md_in_html.py | 10 +++++++--- markdown/extensions/smarty.py | 7 +++++-- markdown/htmlparser.py | 11 +++++++---- markdown/util.py | 17 +++++++++++------ 6 files changed, 52 insertions(+), 26 deletions(-) diff --git a/markdown/core.py b/markdown/core.py index 8c8305b2b..a51082a8a 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -85,7 +85,15 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ - def __init__(self, **kwargs) -> None: + tab_length: int + ESCAPED_CHARS: list[str] + block_level_elements: list[str] + registeredExtensions: list[Extension] + stripTopLevelTags: bool + references: dict[str, tuple[str, str]] + htmlStash: util.HtmlStash + + def __init__(self, **kwargs): """ Creates a new Markdown instance. @@ -106,23 +114,23 @@ def __init__(self, **kwargs) -> None: """ - self.tab_length: int = kwargs.get('tab_length', 4) + self.tab_length = kwargs.get('tab_length', 4) - self.ESCAPED_CHARS: list[str] = [ + self.ESCAPED_CHARS = [ '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!' ] """ List of characters which get the backslash escape treatment. """ - self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy() + self.block_level_elements = BLOCK_LEVEL_ELEMENTS.copy() - self.registeredExtensions: list[Extension] = [] + self.registeredExtensions = [] self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore. - self.stripTopLevelTags: bool = True + self.stripTopLevelTags = True self.build_parser() - self.references: dict[str, tuple[str, str]] = {} - self.htmlStash: util.HtmlStash = util.HtmlStash() + self.references = {} + self.htmlStash = util.HtmlStash() self.registerExtensions(extensions=kwargs.get('extensions', []), configs=kwargs.get('extension_configs', {})) self.set_output_format(kwargs.get('output_format', 'xhtml')) diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 70f8fff03..c2d9a7034 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -38,7 +38,10 @@ class FootnoteExtension(Extension): """ Footnote Extension. """ - def __init__(self, **kwargs) -> None: + found_refs: dict[str, int] + used_refs: set[str] + + def __init__(self, **kwargs): """ Setup configs. """ self.config = { @@ -68,8 +71,8 @@ def __init__(self, **kwargs) -> None: # In multiple invocations, emit links that don't get tangled. self.unique_prefix = 0 - self.found_refs: dict[str, int] = {} - self.used_refs: set[str] = set() + self.found_refs = {} + self.used_refs = set() self.reset() diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 5e107f40c..cb14b4818 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -40,6 +40,10 @@ class HTMLExtractorExtra(HTMLExtractor): Markdown. """ + mdstack: list[str] = [] # When markdown=1, stack contains a list of tags + treebuilder: etree.TreeBuilder + mdstate: list[Literal['block', 'span', 'off', None]] + def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. self.block_level_tags = set(md.block_level_elements.copy()) @@ -56,11 +60,11 @@ def __init__(self, md: Markdown, *args, **kwargs): self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags) self.span_and_blocks_tags = self.block_tags | self.span_tags - def reset(self) -> None: + def reset(self): """Reset this instance. Loses all unprocessed data.""" - self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags + self.mdstack = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() - self.mdstate: list[Literal['block', 'span', 'off', None]] = [] + self.mdstate = [] super().reset() def close(self): diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index d51c3b061..1e001b33f 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -179,7 +179,10 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class SmartyExtension(Extension): """ Add Smarty to Markdown. """ - def __init__(self, **kwargs) -> None: + + substitutions: dict[str, str] + + def __init__(self, **kwargs): self.config = { 'smart_quotes': [True, 'Educate quotes'], 'smart_angled_quotes': [False, 'Educate angled quotes'], @@ -189,7 +192,7 @@ def __init__(self, **kwargs) -> None: } """ Default configuration options. """ super().__init__(**kwargs) - self.substitutions: dict[str, str] = dict(substitutions) + self.substitutions = dict(substitutions) self.substitutions.update(self.getConfig('substitutions', default={})) def _addPatterns( diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 3e3e9e8c5..04e9911c1 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -80,6 +80,9 @@ class HTMLExtractor(htmlparser.HTMLParser): is stored in `cleandoc` as a list of strings. """ + stack: list[str] + cleandoc: list[str] + def __init__(self, md: Markdown, *args, **kwargs): if 'convert_charrefs' not in kwargs: kwargs['convert_charrefs'] = False @@ -93,13 +96,13 @@ def __init__(self, md: Markdown, *args, **kwargs): super().__init__(*args, **kwargs) self.md = md - def reset(self) -> None: + def reset(self): """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False - self.stack: list[str] = [] # When `inraw==True`, stack contains a list of tags - self._cache: list[str] = [] - self.cleandoc: list[str] = [] + self.stack = [] # When `inraw==True`, stack contains a list of tags + self._cache = [] + self.cleandoc = [] self.lineno_start_cache = [0] super().reset() diff --git a/markdown/util.py b/markdown/util.py index d6ba6d2ef..b7ad9b19a 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -218,12 +218,17 @@ class HtmlStash: in the beginning and replace with place-holders. """ - def __init__(self) -> None: + html_counter: int + rawHtmlBlocks: list[str | etree.Element] + tag_counter: int + tag_data: list[TagData] + + def __init__(self): """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks: list[str | etree.Element] = [] + self.rawHtmlBlocks = [] self.tag_counter = 0 - self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear + self.tag_data = [] # list of dictionaries in the order tags appear def store(self, html: str | etree.Element) -> str: """ @@ -309,9 +314,9 @@ class Registry(Generic[_T]): an item using that item's assigned "name". """ - def __init__(self) -> None: - self._data: dict[str, _T] = {} - self._priority: list[_PriorityItem] = [] + def __init__(self): + self._data = {} + self._priority = [] self._is_sorted = False def __contains__(self, item: str | _T) -> bool: From 389ccebc84fa2b7e0de9c2feda750568511f0eda Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 3 Nov 2023 16:26:21 +0100 Subject: [PATCH 8/8] Ignore some problems away --- markdown/core.py | 4 ++-- markdown/postprocessors.py | 3 +-- markdown/treeprocessors.py | 17 +++++++---------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/markdown/core.py b/markdown/core.py index a51082a8a..47d3ba3b4 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -453,8 +453,8 @@ def convertFile( # Don't close here. User may want to write more. else: # Encode manually and write bytes to stdout. - html_bytes = html.encode(encoding, "xmlcharrefreplace") - sys.stdout.buffer.write(html_bytes) + html = html.encode(encoding, "xmlcharrefreplace") + sys.stdout.buffer.write(html) # type: ignore return self diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 177ee4a7a..a620ab098 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -75,8 +75,7 @@ def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - raw: str = self.md.htmlStash.rawHtmlBlocks[i] - html = self.stash_to_string(raw) + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) # type: ignore[arg-type] if self.isblocklevel(html): replacements["

{}

".format( self.md.htmlStash.get_placeholder(i))] = html diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 39ecdb858..52f2409e8 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -271,38 +271,35 @@ def __applyPattern( String with placeholders instead of `ElementTree` elements. """ - if isinstance(pattern, inlinepatterns.InlineProcessor): - new_style = True - new_pattern = pattern - else: # pragma: no cover - new_style = False - legacy_pattern = pattern + new_style = isinstance(pattern, inlinepatterns.InlineProcessor) for exclude in pattern.ANCESTOR_EXCLUDES: if exclude.lower() in self.ancestors: return data, False, 0 + start: int | None + end: int | None if new_style: match = None # Since `handleMatch` may reject our first match, # we iterate over the buffer looking for matches # until we can't find any more. - for match in new_pattern.getCompiledRegExp().finditer(data, startIndex): - node, start, end = new_pattern.handleMatch(match, data) + for match in pattern.getCompiledRegExp().finditer(data, startIndex): + node, start, end = pattern.handleMatch(match, data) # type: ignore if start is None or end is None: startIndex += match.end(0) match = None continue break else: # pragma: no cover - match = legacy_pattern.getCompiledRegExp().match(data[startIndex:]) + match = pattern.getCompiledRegExp().match(data[startIndex:]) leftData = data[:startIndex] if not match: return data, False, 0 if not new_style: # pragma: no cover - node = legacy_pattern.handleMatch(match) + node = pattern.handleMatch(match) # type: ignore start = match.start(0) end = match.end(0)