Skip to content

Commit bc45547

Browse files
committed
Add tool for linting Doc/data/refcounts.dat
1 parent 3afb639 commit bc45547

File tree

3 files changed

+259
-0
lines changed

3 files changed

+259
-0
lines changed

Tools/refcounts/.ruff.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
target-version = "py312"
2+
line-length = 80
3+
fix = true
4+
5+
[lint]
6+
select = [
7+
"ALL"
8+
]
9+
ignore = [
10+
"D", # docstrings
11+
"I001", # split imports
12+
"Q00", # prefer double quotes over single quotes
13+
"T201", # print() found
14+
"PLR2004", # magic values
15+
]

Tools/refcounts/lint.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
"""Lint Doc/data/refcounts.dat."""
2+
3+
from __future__ import annotations
4+
5+
import itertools
6+
import re
7+
import tomllib
8+
from argparse import ArgumentParser
9+
from dataclasses import dataclass, field
10+
from enum import auto as _auto, Enum
11+
from pathlib import Path
12+
from typing import TYPE_CHECKING, LiteralString, NamedTuple
13+
14+
if TYPE_CHECKING:
15+
from collections.abc import Callable, Iterable, Mapping
16+
17+
C_ELLIPSIS: LiteralString = '...'
18+
19+
MATCH_TODO: Callable[[str], re.Match | None]
20+
MATCH_TODO = re.compile(r'^#\s*TODO:\s*(\w+)$').match
21+
22+
OBJECT_TYPES: frozenset[str] = frozenset()
23+
24+
for qualifier, object_type, suffix in itertools.product(
25+
('const ', ''),
26+
(
27+
'PyObject',
28+
'PyLongObject', 'PyTypeObject',
29+
'PyCodeObject', 'PyFrameObject',
30+
'PyModuleObject', 'PyVarObject',
31+
),
32+
('*', '**', '* const *', '* const*'),
33+
):
34+
OBJECT_TYPES |= {
35+
f'{qualifier}{object_type}{suffix}',
36+
f'{qualifier}{object_type} {suffix}',
37+
}
38+
del suffix, object_type, qualifier
39+
40+
IGNORE_LIST: frozenset[str] = frozenset((
41+
# part of the stable ABI but should not be used at all
42+
'PyUnicode_GetSize',
43+
# part of the stable ABI but completely removed
44+
'_PyState_AddModule',
45+
))
46+
47+
def flno_(lineno: int) -> str:
48+
# Format the line so that users can C/C from the terminal
49+
# the line number and jump with their editor using Ctrl+G.
50+
return f'{lineno:>5} '
51+
52+
class RefType(Enum):
53+
UNKNOWN = _auto()
54+
UNUSED = _auto()
55+
DECREF = _auto()
56+
BORROW = _auto()
57+
INCREF = _auto()
58+
STEALS = _auto()
59+
NULL = _auto() # for return values only
60+
61+
class LineInfo(NamedTuple):
62+
func: str
63+
ctype: str | None
64+
name: str | None
65+
reftype: RefType | None
66+
comment: str
67+
68+
@dataclass(slots=True)
69+
class Return:
70+
ctype: str | None
71+
reftype: RefType | None
72+
comment: str
73+
74+
@dataclass(slots=True)
75+
class Param:
76+
name: str
77+
lineno: int
78+
79+
ctype: str | None
80+
reftype: RefType | None
81+
comment: str
82+
83+
@dataclass(slots=True)
84+
class Signature:
85+
name: str
86+
lineno: int
87+
rparam: Return
88+
params: dict[str, Param] = field(default_factory=dict)
89+
90+
class FileView(NamedTuple):
91+
signatures: Mapping[str, Signature]
92+
incomplete: frozenset[str]
93+
94+
def parse_line(line: str) -> LineInfo | None:
95+
parts = line.split(':', maxsplit=4)
96+
if len(parts) != 5:
97+
return None
98+
99+
func, raw_ctype, raw_name, raw_reftype, comment = parts
100+
if not func:
101+
return None
102+
103+
ctype = raw_ctype.strip() or None
104+
name = raw_name.strip() or None
105+
106+
raw_reftype = raw_reftype.strip()
107+
if raw_reftype == '-1':
108+
reftype = RefType.DECREF
109+
elif raw_reftype == '0':
110+
reftype = RefType.BORROW
111+
elif raw_reftype in {'+1', '1'}:
112+
reftype = RefType.INCREF
113+
elif raw_reftype == '$':
114+
reftype = RefType.STEALS
115+
elif raw_reftype.lower() == 'null':
116+
reftype = RefType.NULL
117+
elif not raw_reftype:
118+
reftype = RefType.UNUSED
119+
else:
120+
reftype = RefType.UNKNOWN
121+
122+
comment = comment.strip()
123+
return LineInfo(func, ctype, name, reftype, comment)
124+
125+
def parse(lines: Iterable[str]) -> FileView:
126+
signatures: dict[str, Signature] = {}
127+
incomplete: set[str] = set()
128+
129+
for lineno, line in enumerate(map(str.strip, lines), 1):
130+
if not line:
131+
continue
132+
if line.startswith('#'):
133+
if match := MATCH_TODO(line):
134+
incomplete.add(match.group(1))
135+
continue
136+
137+
info = parse_line(line)
138+
if info is None:
139+
print(f"{flno_(lineno)} cannot parse: {line}")
140+
continue
141+
142+
func, ctype, name, reftype, comment = info
143+
144+
if func not in signatures:
145+
# process return value
146+
if name is not None:
147+
print(f'{flno_(lineno)} named return value in {line!r}')
148+
ret_param = Return(ctype, reftype, comment)
149+
signatures[func] = Signature(func, lineno, ret_param)
150+
else:
151+
# process parameter
152+
if name is None:
153+
print(f'{flno_(lineno)} missing parameter name in {line!r}')
154+
continue
155+
sig: Signature = signatures[func]
156+
if name in sig.params:
157+
print(f'{flno_(lineno)} duplicated parameter name in {line!r}')
158+
continue
159+
sig.params[name] = Param(name, lineno, ctype, reftype, comment)
160+
161+
return FileView(signatures, frozenset(incomplete))
162+
163+
class Warnings:
164+
def __init__(self) -> None:
165+
self.count = 0
166+
167+
def block(self, sig: Signature, message: str) -> None:
168+
self.count += 1
169+
print(f'{flno_(sig.lineno)} {sig.name:50} {message}')
170+
171+
def param(self, sig: Signature, param: Param, message: str) -> None:
172+
self.count += 1
173+
fullname = f'{sig.name}[{param.name}]'
174+
print(f'{flno_(param.lineno)} {fullname:50} {message}')
175+
176+
def check(view: FileView) -> None:
177+
w = Warnings()
178+
179+
for sig in view.signatures.values(): # type: Signature
180+
# check the return value
181+
rparam = sig.rparam
182+
if not rparam.ctype:
183+
w.block(sig, "missing return value type")
184+
if rparam.reftype is RefType.UNKNOWN:
185+
w.block(sig, "unknown return value type")
186+
# check the parameters
187+
for name, param in sig.params.items(): # type: (str, Param)
188+
ctype, reftype = param.ctype, param.reftype
189+
if ctype in OBJECT_TYPES and reftype is RefType.UNUSED:
190+
w.param(sig, param, "missing reference count management")
191+
if ctype not in OBJECT_TYPES and reftype is not RefType.UNUSED:
192+
w.param(sig, param, "unused reference count management")
193+
if name != C_ELLIPSIS and not name.isidentifier():
194+
# Python accepts the same identifiers as in C
195+
w.param(sig, param, "invalid parameter name")
196+
197+
print()
198+
if w.count:
199+
print(f"Found {w.count} issues")
200+
names = view.signatures.keys()
201+
if sorted(names) != list(names):
202+
print("Entries are not sorted")
203+
204+
def check_structure(view: FileView, stable_abi_file: str) -> None:
205+
stable_abi_str = Path(stable_abi_file).read_text()
206+
stable_abi = tomllib.loads(stable_abi_str)
207+
expect = stable_abi['function'].keys()
208+
# check if there are missing entries (those marked as "TODO" are ignored)
209+
actual = IGNORE_LIST | view.incomplete | view.signatures.keys()
210+
if missing := (expect - actual):
211+
print('[!] missing stable ABI entries:')
212+
for name in sorted(missing):
213+
print(name)
214+
215+
def _create_parser() -> ArgumentParser:
216+
parser = ArgumentParser(prog='lint.py')
217+
parser.add_argument('file', help="the file to check")
218+
parser.add_argument('--stable-abi', help="the stable ABI TOML file to use")
219+
return parser
220+
221+
def main() -> None:
222+
parser = _create_parser()
223+
args = parser.parse_args()
224+
lines = Path(args.file).read_text().splitlines()
225+
print(" PARSING ".center(80, '-'))
226+
view = parse(lines)
227+
print(" CHECKING ".center(80, '-'))
228+
check(view)
229+
if args.stable_abi:
230+
print(" CHECKING STABLE ABI ".center(80, '-'))
231+
check_structure(view, args.stable_abi)
232+
233+
if __name__ == "__main__":
234+
main()

Tools/refcounts/mypy.ini

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[mypy]
2+
files = Tools/refcounts/lint.py
3+
pretty = True
4+
show_traceback = True
5+
python_version = 3.12
6+
7+
strict = True
8+
warn_unreachable = True
9+
enable_error_code = all
10+
warn_return_any = False

0 commit comments

Comments
 (0)