From 5ed9db4b762dcc10d5f889589a1804566b205643 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Tue, 5 Jul 2022 15:58:45 +0100 Subject: [PATCH 1/5] Export API Ids. --- .gitignore | 1 + package.json | 5 +- ...{crowdin-convert.py => crowdin_convert.py} | 0 scripts/export_api_ids.py | 87 +++++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) rename scripts/{crowdin-convert.py => crowdin_convert.py} (100%) create mode 100644 scripts/export_api_ids.py diff --git a/.gitignore b/.gitignore index 3de99a8..c5fc7b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /typeshed.json /typeshed.*.json +/api-ids.json /node_modules/ *.rst /crowdin/ diff --git a/package.json b/package.json index 6f5bafe..a4f636f 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,9 @@ "description": "Temporary home for micro:bit MicroPython stubs.", "scripts": { "test": "pyright -p test-pyrightconfig.json", - "i18n:typeshed-to-crowdin": "python3 scripts/crowdin-convert.py typeshed-to-crowdin", - "i18n:crowdin-to-typeshed": "python3 scripts/crowdin-convert.py crowdin-to-typeshed" + "i18n:typeshed-to-crowdin": "python3 scripts/crowdin_convert.py typeshed-to-crowdin", + "i18n:crowdin-to-typeshed": "python3 scripts/crowdin_convert.py crowdin-to-typeshed", + "export-api-ids": "python3 scripts/export_api_ids.py" }, "repository": { "type": "git", diff --git a/scripts/crowdin-convert.py b/scripts/crowdin_convert.py similarity index 100% rename from scripts/crowdin-convert.py rename to scripts/crowdin_convert.py diff --git a/scripts/export_api_ids.py b/scripts/export_api_ids.py new file mode 100644 index 0000000..d778620 --- /dev/null +++ b/scripts/export_api_ids.py @@ -0,0 +1,87 @@ +""" + Creates api.en.json file in Crowdin's format + containing translatable parts of the API + produced from the type stubs files. + Inclues first line of docstring (summary), + parameter names and parameter docs. +""" + +import ast +import os +import json +from crowdin_convert import ( + get_stub_files, + DIR, + TypeshedFile, + get_source, + DocStringVisitor, +) + +modules = [ + "gc", + "log", + "machine", + "math", + "microbit", + "micropython", + "music", + "neopixel", + "os", + "radio", + "random", + "speech", + "struct", + "sys", + "time", +] + + +def export_api_ids(): + data_list = [] + files_to_process = get_stub_files() + for ts_file in files_to_process: + if not ts_file.python_file: + continue + data_list = data_list + get_api_ids(ts_file) + # Remove overloads. + data_list = list(set(data_list)) + data_list.sort() + data = {"apiIds": data_list} + save_api_ids(data) + pass + + +def save_api_ids(data): + with open(os.path.join(DIR, "../", "api-ids.json"), "w") as file: + file.write(json.dumps(data, indent=2)) + + +def checkModuleRequired(module_name): + if module_name in modules: + return True + if "microbit" in module_name: + return True + return False + + +def get_api_ids(ts_file: TypeshedFile): + source = get_source(ts_file.file_path) + tree = ast.parse(source) + + class DocStringCollector(DocStringVisitor): + def __init__(self): + super().__init__(ts_file.module_name) + self.data: list[str] = [] + + def handle_docstring(self, node: ast.AST, name: str) -> None: + key = ".".join([*self.key, name]) + if checkModuleRequired(ts_file.module_name): + self.data.append(key) + + collector = DocStringCollector() + collector.visit(tree) + return collector.data + + +if __name__ == "__main__": + export_api_ids() From b042f73b284d528f277c5262004de61c5a129285 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Tue, 5 Jul 2022 16:01:05 +0100 Subject: [PATCH 2/5] Update docstring. --- scripts/export_api_ids.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/export_api_ids.py b/scripts/export_api_ids.py index d778620..c2e545b 100644 --- a/scripts/export_api_ids.py +++ b/scripts/export_api_ids.py @@ -1,9 +1,8 @@ """ - Creates api.en.json file in Crowdin's format - containing translatable parts of the API - produced from the type stubs files. - Inclues first line of docstring (summary), - parameter names and parameter docs. + Creates api-ids.json file which + contains all API calls that correspond + to documentation shown in the + Python Editor sidebar. """ import ast From 1dbddf1229d2b028f53a4280062ca6da3840e7c6 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Tue, 5 Jul 2022 16:55:50 +0100 Subject: [PATCH 3/5] Add common functions, types, vars into single file --- scripts/common.py | 117 +++++++++++++++++++++++++++++++++++++ scripts/crowdin_convert.py | 108 +--------------------------------- scripts/export_api_ids.py | 17 +++--- 3 files changed, 128 insertions(+), 114 deletions(-) create mode 100644 scripts/common.py diff --git a/scripts/common.py b/scripts/common.py new file mode 100644 index 0000000..194184a --- /dev/null +++ b/scripts/common.py @@ -0,0 +1,117 @@ +""" + Functions, types and variables + shared with various scripts. + Including: + crowdin_convert.py + export_api_ids.py +""" + +from dataclasses import dataclass +import os +import ast +from typing import Any +from typing import Optional + +DIR = os.path.dirname(__file__) + + +@dataclass +class TypeshedFile: + file_path: str + module_name: str + python_file: bool + + +def get_source(file_path): + with open(file_path, "r", encoding="utf-8") as file: + return file.read() + + +def module_name_for_path(file_path: str): + """Hacky determination of the module name.""" + name = os.path.basename(file_path) + in_microbit_package = os.path.basename(os.path.dirname(file_path)) == "microbit" + if in_microbit_package: + if name == "__init__.pyi": + return "microbit" + return ".".join(["microbit", os.path.splitext(name)[0]]) + return os.path.splitext(name)[0] + + +def get_stub_files() -> list[TypeshedFile]: + top = os.path.join(DIR, "..", "lang/en/typeshed/stdlib") + files_to_process: list[TypeshedFile] = [] + for root, dirs, files in os.walk(top): + for name in files: + file_path = os.path.join(root, name) + # Skip audio stubs file that imports from microbit audio + # (so we don't include its docstring) + if ( + os.path.basename(os.path.dirname(file_path)) != "microbit" + and name == "audio.pyi" + ): + continue + if name.endswith(".pyi"): + files_to_process.append( + TypeshedFile( + file_path=file_path, + module_name=module_name_for_path(file_path), + python_file=True, + ) + ) + else: + files_to_process.append( + TypeshedFile( + file_path=file_path, + module_name="", + python_file=False, + ) + ) + return sorted(files_to_process, key=lambda x: x.file_path) + + +class DocStringVisitor(ast.NodeVisitor): + def __init__(self, module_name): + self.module_name = module_name + self.key = [] + self.used_keys = set() + self.preceding: Optional[str] = None + + def visit_Module(self, node: ast.Module) -> Any: + name = self.module_name + self.handle_docstring(node, name) + + self.key.append(name) + self.generic_visit(node) + self.key.pop() + + def visit_ClassDef(self, node): + name = node.name + self.handle_docstring(node, name) + + self.key.append(name) + self.generic_visit(node) + self.key.pop() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: + self.preceding = None + self.handle_docstring(node, node.name) + + def visit_AnnAssign(self, node: ast.AnnAssign) -> Any: + self.preceding = node.target.id # type: ignore + + def visit_Assign(self, node: ast.Assign) -> Any: + if len(node.targets) != 1: + raise AssertionError() + self.preceding = node.targets[0].id # type: ignore + + def visit_Expr(self, node: ast.Expr) -> Any: + if self.preceding: + self.handle_docstring(node, self.preceding) + + def generic_visit(self, node: ast.AST) -> Any: + self.preceding = None + return super().generic_visit(node) + + def handle_docstring(self, node: ast.AST, name: str) -> None: + raise NotImplementedError() diff --git a/scripts/crowdin_convert.py b/scripts/crowdin_convert.py index 6724254..dd32736 100644 --- a/scripts/crowdin_convert.py +++ b/scripts/crowdin_convert.py @@ -7,19 +7,14 @@ """ import ast -from dataclasses import dataclass import os import json import re import sys - -from typing import Any - -from typing import Optional +from common import TypeshedFile, get_stub_files, DIR, get_source, DocStringVisitor NODE_TYPES_WITH_DOCSTRINGS = (ast.FunctionDef, ast.Module, ast.ClassDef) -DIR = os.path.dirname(__file__) EN_JSON_PATH = os.path.join(DIR, "../crowdin/api.en.json") TRANSLATED_JSON_DIR = os.path.join(DIR, "../crowdin/translated") @@ -35,55 +30,6 @@ def typeshed_to_crowdin(): save_docstrings_as_json(data) -@dataclass -class TypeshedFile: - file_path: str - module_name: str - python_file: bool - - -def get_stub_files() -> list[TypeshedFile]: - top = os.path.join(DIR, "..", "lang/en/typeshed/stdlib") - files_to_process: list[TypeshedFile] = [] - for root, dirs, files in os.walk(top): - for name in files: - file_path = os.path.join(root, name) - # Skip audio stubs file that imports from microbit audio (so we don't include its docstring) - if ( - os.path.basename(os.path.dirname(file_path)) != "microbit" - and name == "audio.pyi" - ): - continue - if name.endswith(".pyi"): - files_to_process.append( - TypeshedFile( - file_path=file_path, - module_name=module_name_for_path(file_path), - python_file=True, - ) - ) - else: - files_to_process.append( - TypeshedFile( - file_path=file_path, - module_name="", - python_file=False, - ) - ) - return sorted(files_to_process, key=lambda x: x.file_path) - - -def module_name_for_path(file_path: str): - """Hacky determination of the module name used as a translation key.""" - name = os.path.basename(file_path) - in_microbit_package = os.path.basename(os.path.dirname(file_path)) == "microbit" - if in_microbit_package: - if name == "__init__.pyi": - return "microbit" - return ".".join(["microbit", os.path.splitext(name)[0]]) - return os.path.splitext(name)[0] - - # Translation key to dict with message/description fields. TranslationJSON = dict[str, dict[str, str]] @@ -112,11 +58,6 @@ def handle_docstring(self, node: ast.AST, name: str) -> None: return collector.data -def get_source(file_path): - with open(file_path, "r", encoding="utf-8") as file: - return file.read() - - def pretty_api_name(name): return name.replace("_", " ").strip().lower() @@ -458,53 +399,6 @@ def maybe_dir(maybe_path): os.mkdir(maybe_path) -class DocStringVisitor(ast.NodeVisitor): - def __init__(self, module_name): - self.module_name = module_name - self.key = [] - self.used_keys = set() - self.preceding: Optional[str] = None - - def visit_Module(self, node: ast.Module) -> Any: - name = self.module_name - self.handle_docstring(node, name) - - self.key.append(name) - self.generic_visit(node) - self.key.pop() - - def visit_ClassDef(self, node): - name = node.name - self.handle_docstring(node, name) - - self.key.append(name) - self.generic_visit(node) - self.key.pop() - - def visit_FunctionDef(self, node: ast.FunctionDef) -> Any: - self.preceding = None - self.handle_docstring(node, node.name) - - def visit_AnnAssign(self, node: ast.AnnAssign) -> Any: - self.preceding = node.target.id # type: ignore - - def visit_Assign(self, node: ast.Assign) -> Any: - if len(node.targets) != 1: - raise AssertionError() - self.preceding = node.targets[0].id # type: ignore - - def visit_Expr(self, node: ast.Expr) -> Any: - if self.preceding: - self.handle_docstring(node, self.preceding) - - def generic_visit(self, node: ast.AST) -> Any: - self.preceding = None - return super().generic_visit(node) - - def handle_docstring(self, node: ast.AST, name: str) -> None: - raise NotImplementedError() - - if __name__ == "__main__": operation = sys.argv[1] if operation == "typeshed-to-crowdin": diff --git a/scripts/export_api_ids.py b/scripts/export_api_ids.py index c2e545b..6dea913 100644 --- a/scripts/export_api_ids.py +++ b/scripts/export_api_ids.py @@ -8,7 +8,7 @@ import ast import os import json -from crowdin_convert import ( +from common import ( get_stub_files, DIR, TypeshedFile, @@ -39,11 +39,8 @@ def export_api_ids(): data_list = [] files_to_process = get_stub_files() for ts_file in files_to_process: - if not ts_file.python_file: - continue - data_list = data_list + get_api_ids(ts_file) - # Remove overloads. - data_list = list(set(data_list)) + if ts_file.python_file: + data_list = data_list + get_api_ids(ts_file) data_list.sort() data = {"apiIds": data_list} save_api_ids(data) @@ -73,7 +70,13 @@ def __init__(self): self.data: list[str] = [] def handle_docstring(self, node: ast.AST, name: str) -> None: - key = ".".join([*self.key, name]) + key_root = ".".join([*self.key, name]) + key = key_root + suffix = 1 + while key in self.used_keys: + key = f"{key_root}-{suffix}" + suffix += 1 + self.used_keys.add(key) if checkModuleRequired(ts_file.module_name): self.data.append(key) From d31ddeb4c868374f1a3ba2f8fd7e00f2606369d4 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Tue, 5 Jul 2022 17:52:07 +0100 Subject: [PATCH 4/5] Hacky way to check for overloads. Needs work. --- scripts/crowdin_convert.py | 11 ++++++++--- scripts/export_api_ids.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts/crowdin_convert.py b/scripts/crowdin_convert.py index dd32736..3a535e7 100644 --- a/scripts/crowdin_convert.py +++ b/scripts/crowdin_convert.py @@ -47,9 +47,14 @@ def handle_docstring(self, node: ast.AST, name: str) -> None: key_root = ".".join([*self.key, name]) key = key_root suffix = 1 - while key in self.used_keys: - key = f"{key_root}-{suffix}" - suffix += 1 + if isinstance(node, ast.FunctionDef): # ctx.id + for decorator in node.decorator_list: + if hasattr(decorator, "id"): + if decorator.id == "overload": + key = f"{key}-{suffix}" + while key in self.used_keys: + suffix += 1 + key = f"{key_root}-{suffix}" self.used_keys.add(key) self.data.update(get_entries(node, name, key)) diff --git a/scripts/export_api_ids.py b/scripts/export_api_ids.py index 6dea913..12a164f 100644 --- a/scripts/export_api_ids.py +++ b/scripts/export_api_ids.py @@ -73,9 +73,14 @@ def handle_docstring(self, node: ast.AST, name: str) -> None: key_root = ".".join([*self.key, name]) key = key_root suffix = 1 - while key in self.used_keys: - key = f"{key_root}-{suffix}" - suffix += 1 + if isinstance(node, ast.FunctionDef): # ctx.id + for decorator in node.decorator_list: + if hasattr(decorator, "id"): + if decorator.id == "overload": + key = f"{key}-{suffix}" + while key in self.used_keys: + suffix += 1 + key = f"{key_root}-{suffix}" self.used_keys.add(key) if checkModuleRequired(ts_file.module_name): self.data.append(key) From 5f951f64ba81cba654ce936208b8db0b8202ed72 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Wed, 6 Jul 2022 12:49:26 +0100 Subject: [PATCH 5/5] Add builtins. --- scripts/export_api_ids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/export_api_ids.py b/scripts/export_api_ids.py index 12a164f..5563ead 100644 --- a/scripts/export_api_ids.py +++ b/scripts/export_api_ids.py @@ -17,6 +17,7 @@ ) modules = [ + "builtins", "gc", "log", "machine",