-
-
Notifications
You must be signed in to change notification settings - Fork 43
Add pip to SBOM at release stage #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,2 @@ | ||
| pytest | ||
| pytest-mock |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,12 +12,16 @@ | |
|
|
||
| import datetime | ||
| import hashlib | ||
| import io | ||
| import json | ||
| import os | ||
| import re | ||
| import subprocess | ||
| import sys | ||
| import tarfile | ||
| import zipfile | ||
| from urllib.request import urlopen | ||
| import typing | ||
|
|
||
|
|
||
| def spdx_id(value: str) -> str: | ||
|
|
@@ -132,6 +136,186 @@ def recursive_sort_in_place(value): | |
| recursive_sort_in_place(sbom_data) | ||
|
|
||
|
|
||
| def fetch_package_metadata_from_pypi(project: str, version: str, filename: str | None = None) -> tuple[str, str] | None: | ||
| """ | ||
| Fetches the SHA256 checksum and download location from PyPI. | ||
| If we're given a filename then we match with that, otherwise we use wheels. | ||
| """ | ||
| # Get the package download URL from PyPI. | ||
| try: | ||
| raw_text = urlopen(f"https://pypi.org/pypi/{project}/{version}/json").read() | ||
| release_metadata = json.loads(raw_text) | ||
| url: dict[str, typing.Any] | ||
|
|
||
| # Look for a matching artifact filename and then check | ||
| # its remote checksum to the local one. | ||
| for url in release_metadata["urls"]: | ||
| # pip can only use Python-only dependencies, so there's | ||
| # no risk of picking the 'incorrect' wheel here. | ||
| if ( | ||
| (filename is None and url["packagetype"] == "bdist_wheel") | ||
| or (filename is not None and url["filename"] == filename) | ||
| ): | ||
| break | ||
| else: | ||
| raise ValueError(f"No matching filename on PyPI for '{filename}'") | ||
|
|
||
| # Successfully found the download URL for the matching artifact. | ||
| download_url = url["url"] | ||
| checksum_sha256 = url["digests"]["sha256"] | ||
| return download_url, checksum_sha256 | ||
|
|
||
| except (OSError, ValueError) as e: | ||
| raise ValueError(f"Couldn't fetch metadata for project '{project}' from PyPI: {e}") | ||
|
|
||
|
|
||
| def remove_pip_from_sbom(sbom_data: dict[str, typing.Any]) -> None: | ||
| """ | ||
| Removes pip and its dependencies from the SBOM data. | ||
| This is only necessary if there's potential we get | ||
| pip SBOM data from the CPython source SBOM. | ||
| """ | ||
| sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") | ||
| sbom_spdx_ids_to_remove = {sbom_pip_spdx_id} | ||
|
|
||
| # Find all package SPDXIDs that pip depends on. | ||
| for sbom_relationship in sbom_data["relationships"]: | ||
| if ( | ||
| sbom_relationship["relationshipType"] == "DEPENDS_ON" | ||
| and sbom_relationship["spdxElementId"] == sbom_pip_spdx_id | ||
| ): | ||
| sbom_spdx_ids_to_remove.add(sbom_relationship["relatedSpdxElement"]) | ||
|
|
||
| # Remove all the packages and relationships. | ||
| sbom_data["packages"] = [ | ||
| sbom_package for sbom_package in sbom_data["packages"] | ||
| if sbom_package["SPDXID"] not in sbom_spdx_ids_to_remove | ||
| ] | ||
| sbom_data["relationships"] = [ | ||
| sbom_relationship for sbom_relationship in sbom_data["relationships"] | ||
| if sbom_relationship["relatedSpdxElement"] not in sbom_spdx_ids_to_remove | ||
| ] | ||
|
|
||
|
|
||
| def create_pip_sbom_from_wheel( | ||
| sbom_data: dict[str, typing.Any], | ||
| pip_wheel_filename: str, | ||
| pip_wheel_bytes: bytes | ||
| ) -> None: | ||
| """ | ||
| pip is a part of a packaging ecosystem (Python, surprise!) so it's actually | ||
| automatable to discover the metadata we need like the version and checksums | ||
| so let's do that on behalf of our friends at the PyPA. This function also | ||
| discovers vendored packages within pip and fetches their metadata. | ||
| """ | ||
| # Remove pip from the SBOM in case it's included in the CPython source code SBOM. | ||
| remove_pip_from_sbom(sbom_data) | ||
|
|
||
| # Wheel filename format puts the version right after the project name. | ||
| pip_version = pip_wheel_filename.split("-")[1] | ||
| pip_checksum_sha256 = hashlib.sha256(pip_wheel_bytes).hexdigest() | ||
|
|
||
| pip_download_url, pip_actual_sha256 = fetch_package_metadata_from_pypi( | ||
| project="pip", | ||
| version=pip_version, | ||
| filename=pip_wheel_filename, | ||
| ) | ||
| if pip_actual_sha256 != pip_checksum_sha256: | ||
| raise ValueError("pip wheel checksum doesn't match PyPI") | ||
|
|
||
| # Parse 'pip/_vendor/vendor.txt' from the wheel for sub-dependencies. | ||
| with zipfile.ZipFile(io.BytesIO(pip_wheel_bytes)) as whl: | ||
| vendor_txt_data = whl.read("pip/_vendor/vendor.txt").decode() | ||
|
|
||
| # With this version regex we're assuming that pip isn't using pre-releases. | ||
| # If any version doesn't match we get a failure below, so we're safe doing this. | ||
| version_pin_re = re.compile(r"^([a-zA-Z0-9_.-]+)==([0-9.]*[0-9])$") | ||
| sbom_pip_dependency_spdx_ids = set() | ||
| for line in vendor_txt_data.splitlines(): | ||
| line = line.partition("#")[0].strip() # Strip comments and whitespace. | ||
| if not line: # Skip empty lines. | ||
| continue | ||
|
|
||
| # Non-empty lines we must be able to match. | ||
| match = version_pin_re.match(line) | ||
| assert match is not None, f"Unparseable line in vendor.txt: {line!r}" # Make mypy happy. | ||
|
|
||
| # Parse out and normalize the project name. | ||
| project_name, project_version = match.groups() | ||
| project_name = project_name.lower() | ||
|
|
||
| # Fetch the metadata from PyPI | ||
| project_download_url, project_checksum_sha256 = ( | ||
| fetch_package_metadata_from_pypi(project_name, project_version) | ||
| ) | ||
|
|
||
| # Update our SBOM data with what we received from PyPI. | ||
| sbom_project_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{project_name}") | ||
| sbom_pip_dependency_spdx_ids.add(sbom_project_spdx_id) | ||
| sbom_data["packages"].append({ | ||
| "SPDXID": sbom_project_spdx_id, | ||
| "name": project_name, | ||
| "versionInfo": project_version, | ||
| "downloadLocation": project_download_url, | ||
| "checksums": [ | ||
| {"algorithm": "SHA256", "checksumValue": project_checksum_sha256} | ||
| ], | ||
| "externalRefs": [ | ||
| { | ||
| "referenceCategory": "PACKAGE_MANAGER", | ||
| "referenceLocator": f"pkg:pypi/{project_name}@{project_version}", | ||
| "referenceType": "purl", | ||
| }, | ||
| ], | ||
| "primaryPackagePurpose": "SOURCE", | ||
| "licenseConcluded": "NOASSERTION", | ||
| }) | ||
|
|
||
| # Now we add pip to the SBOM and dependency relationships | ||
| sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") | ||
| sbom_data["packages"].append( | ||
| { | ||
| "SPDXID": sbom_pip_spdx_id, | ||
| "name": "pip", | ||
| "versionInfo": pip_version, | ||
| "originator": "Organization: Python Packaging Authority", | ||
| "licenseConcluded": "NOASSERTION", | ||
| "downloadLocation": pip_download_url, | ||
| "checksums": [ | ||
| {"algorithm": "SHA256", "checksumValue": pip_checksum_sha256} | ||
| ], | ||
| "externalRefs": [ | ||
| { | ||
| "referenceCategory": "SECURITY", | ||
| "referenceLocator": f"cpe:2.3:a:pypa:pip:{pip_version}:*:*:*:*:*:*:*", | ||
| "referenceType": "cpe23Type", | ||
| }, | ||
| { | ||
| "referenceCategory": "PACKAGE_MANAGER", | ||
| "referenceLocator": f"pkg:pypi/pip@{pip_version}", | ||
| "referenceType": "purl", | ||
| }, | ||
| ], | ||
| "primaryPackagePurpose": "SOURCE", | ||
| } | ||
| ) | ||
| for sbom_dep_spdx_id in sorted(sbom_pip_dependency_spdx_ids): | ||
| sbom_data["relationships"].append({ | ||
| "spdxElementId": sbom_pip_spdx_id, | ||
| "relatedSpdxElement": sbom_dep_spdx_id, | ||
| "relationshipType": "DEPENDS_ON" | ||
| }) | ||
|
|
||
| # Finally, CPython depends on pip. | ||
| sbom_data["relationships"].append( | ||
| { | ||
| "spdxElementId": "SPDXRef-PACKAGE-cpython", | ||
| "relatedSpdxElement": sbom_pip_spdx_id, | ||
| "relationshipType": "DEPENDS_ON", | ||
| } | ||
| ) | ||
|
|
||
|
|
||
| def create_sbom_for_source_tarball(tarball_path: str): | ||
| """Stitches together an SBOM for a source tarball""" | ||
| tarball_name = os.path.basename(tarball_path) | ||
|
|
@@ -163,9 +347,9 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
| "Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'" | ||
| ) from None | ||
| sbom_bytes = tarball.extractfile(sbom_tarball_member).read() | ||
| sbom_data = json.loads(sbom_bytes) | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed this variable name to match everywhere in this script. |
||
|
|
||
| sbom = json.loads(sbom_bytes) | ||
| sbom.update({ | ||
| sbom_data.update({ | ||
| "SPDXID": "SPDXRef-DOCUMENT", | ||
| "spdxVersion": "SPDX-2.3", | ||
| "name": "CPython SBOM", | ||
|
|
@@ -211,18 +395,36 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
| } | ||
|
|
||
| # The top-level CPython package depends on every vendored sub-package. | ||
| for sbom_package in sbom["packages"]: | ||
| sbom["relationships"].append({ | ||
| for sbom_package in sbom_data["packages"]: | ||
| sbom_data["relationships"].append({ | ||
| "spdxElementId": sbom_cpython_package["SPDXID"], | ||
| "relatedSpdxElement": sbom_package["SPDXID"], | ||
| "relationshipType": "DEPENDS_ON", | ||
| }) | ||
|
|
||
| sbom["packages"].append(sbom_cpython_package) | ||
| sbom_data["packages"].append(sbom_cpython_package) | ||
|
|
||
| # Find the pip wheel in ensurepip in the tarball | ||
| for member in tarball.getmembers(): | ||
| match = re.match(rf"^Python-{cpython_version}/Lib/ensurepip/_bundled/(pip-.*\.whl)$", member.name) | ||
| if match is not None: | ||
| pip_wheel_filename = match.group(1) | ||
| pip_wheel_bytes = tarball.extractfile(member).read() | ||
| break | ||
| else: | ||
| raise ValueError("Could not find pip wheel in 'Lib/ensurepip/_bundled/...'") | ||
|
|
||
| # Now add pip to the SBOM. We do this after the above step to avoid | ||
| # CPython being dependent on packages that pip is dependent on. | ||
| create_pip_sbom_from_wheel( | ||
| sbom_data=sbom_data, | ||
| pip_wheel_filename=pip_wheel_filename, | ||
| pip_wheel_bytes=pip_wheel_bytes | ||
| ) | ||
|
|
||
| # Extract all currently known files from the SBOM with their checksums. | ||
| known_sbom_files = {} | ||
| for sbom_file in sbom["files"]: | ||
| for sbom_file in sbom_data["files"]: | ||
| sbom_filename = sbom_file["fileName"] | ||
|
|
||
| # Look for the expected SHA256 checksum. | ||
|
|
@@ -267,7 +469,7 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
| # If this is a new file, then it's a part of the 'CPython' SBOM package. | ||
| else: | ||
| sbom_file_spdx_id = spdx_id(f"SPDXRef-FILE-{member_name_no_prefix}") | ||
| sbom["files"].append( | ||
| sbom_data["files"].append( | ||
| { | ||
| "SPDXID": sbom_file_spdx_id, | ||
| "fileName": member_name_no_prefix, | ||
|
|
@@ -283,7 +485,7 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
| ], | ||
| } | ||
| ) | ||
| sbom["relationships"].append( | ||
| sbom_data["relationships"].append( | ||
| { | ||
| "spdxElementId": sbom_cpython_package["SPDXID"], | ||
| "relatedSpdxElement": sbom_file_spdx_id, | ||
|
|
@@ -300,7 +502,7 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
| ) | ||
|
|
||
| # Final relationship, this SBOM describes the CPython package. | ||
| sbom["relationships"].append( | ||
| sbom_data["relationships"].append( | ||
| { | ||
| "spdxElementId": "SPDXRef-DOCUMENT", | ||
| "relatedSpdxElement": sbom_cpython_package["SPDXID"], | ||
|
|
@@ -310,17 +512,17 @@ def create_sbom_for_source_tarball(tarball_path: str): | |
|
|
||
| # Apply the 'supplier' tag to every package since we're shipping | ||
| # the package in the tarball itself. Originator field is used for maintainers. | ||
| for sbom_package in sbom["packages"]: | ||
| for sbom_package in sbom_data["packages"]: | ||
| sbom_package["supplier"] = "Organization: Python Software Foundation" | ||
| sbom_package["filesAnalyzed"] = True | ||
|
|
||
| # Calculate the 'packageVerificationCode' values for files in packages. | ||
| calculate_package_verification_codes(sbom) | ||
| calculate_package_verification_codes(sbom_data) | ||
|
|
||
| # Normalize SBOM structures for reproducibility. | ||
| normalize_sbom_data(sbom) | ||
| normalize_sbom_data(sbom_data) | ||
|
|
||
| return sbom | ||
| return sbom_data | ||
|
|
||
|
|
||
| def main() -> None: | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll be able to remove this function once I remove pip from the SBOM in the CPython source code.