From b1647710a5066009cd9754c8938e73d4b94c1f74 Mon Sep 17 00:00:00 2001 From: Matt Mackay Date: Tue, 26 Jul 2022 16:23:47 -0400 Subject: [PATCH] fix(gazelle): handle purelib and platlib packages that don't set 'Root-Is-Purelib: true' --- gazelle/modules_mapping/generator.py | 61 ++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py index b93f9689ec..8523542aa2 100644 --- a/gazelle/modules_mapping/generator.py +++ b/gazelle/modules_mapping/generator.py @@ -17,26 +17,15 @@ def __init__(self, stderr, output_file): # by looking at the directory structure. def dig_wheel(self, whl): mapping = {} - wheel_name = get_wheel_name(whl) with zipfile.ZipFile(whl, "r") as zip_file: for path in zip_file.namelist(): if is_metadata(path): - continue - ext = pathlib.Path(path).suffix - if ext == ".py" or ext == ".so": - # Note the '/' here means that the __init__.py is not in the - # root of the wheel, therefore we can index the directory - # where this file is as an importable package. - if path.endswith("/__init__.py"): - module = path[: -len("/__init__.py")].replace("/", ".") - mapping[module] = wheel_name - # Always index the module file. - if ext == ".so": - # Also remove extra metadata that is embeded as part of - # the file name as an extra extension. - ext = "".join(pathlib.Path(path).suffixes) - module = path[: -len(ext)].replace("/", ".") - mapping[module] = wheel_name + if data_has_purelib_or_platlib(path): + module_for_path(path, whl, mapping) + else: + continue + else: + module_for_path(path, whl, mapping) return mapping # run is the entrypoint for the generator. @@ -73,6 +62,44 @@ def is_metadata(path): return top_level.endswith(".dist-info") or top_level.endswith(".data") +# The .data is allowed to contain a full purelib or platlib directory +# These get unpacked into site-packages, so require indexing too. +# This is the same if "Root-Is-Purelib: true" is set and the files are at the root. +# Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib +def data_has_purelib_or_platlib(path): + maybe_lib = path.split("/")[1].lower() + return is_metadata(path) and ( + maybe_lib == "purelib" or maybe_lib == "platlib" + ) + + + +def module_for_path(path, whl, mapping): + ext = pathlib.Path(path).suffix + if ext == ".py" or ext == ".so": + if "purelib" in path or "platlib" in path: + root = "/".join(path.split("/")[2:]) + else: + root = path + + wheel_name = get_wheel_name(whl) + + if root.endswith("/__init__.py"): + # Note the '/' here means that the __init__.py is not in the + # root of the wheel, therefore we can index the directory + # where this file is as an importable package. + module = root[: -len("/__init__.py")].replace("/", ".") + mapping[module] = wheel_name + + # Always index the module file. + if ext == ".so": + # Also remove extra metadata that is embeded as part of + # the file name as an extra extension. + ext = ''.join(pathlib.Path(root).suffixes) + module = root[: -len(ext)].replace("/", ".") + mapping[module] = wheel_name + + if __name__ == "__main__": output_file = sys.argv[1] wheels = sys.argv[2:]