From 35c8a53216ad873a676b2365f6b98cb1c2f708b5 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 5 Mar 2024 13:30:12 +1000 Subject: [PATCH 01/25] chore: update provenance_payload in __main__. Signed-off-by: Ben Selwyn-Smith --- src/macaron/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index ad70156f3..3323f418e 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -142,7 +142,7 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None run_config, analyzer_single_args.sbom_path, analyzer_single_args.skip_deps, - prov_payload=prov_payload, + provenance_payload=prov_payload, ) sys.exit(status_code) From 321c21767d2efb5762b8f7b67038a3e8a0eebdff Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 5 Mar 2024 14:14:50 +1000 Subject: [PATCH 02/25] chore: ensure SLSA v0.1 list index is within the bounds of the associated list. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 276 ++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 src/macaron/repo_finder/provenance_extractor.py diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py new file mode 100644 index 000000000..36ed813ed --- /dev/null +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -0,0 +1,276 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains methods for extracting repository and commit metadata from provenance files.""" +import logging +from typing import overload + +from macaron.slsa_analyzer.provenance import intoto +from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload +from macaron.util import JsonType + +logger: logging.Logger = logging.getLogger(__name__) + + +def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str, str]: + """Extract the repository and commit metadata from the passed provenance payload. + + Parameters + ---------- + payload: InTotoPayload + The payload to extract from. + + Returns + ------- + tuple[str, str] + The repository URL and commit hash if found, a pair of empty strings otherwise. + """ + predicate_type = payload.statement.get("predicateType") + if isinstance(payload, InTotoV1Payload): + if isinstance(payload, InTotoV1Payload): + if predicate_type == "https://slsa.dev/provenance/v1": + return _extract_from_slsa_v1(payload) + elif isinstance(payload, InTotoV01Payload): + if predicate_type == "https://slsa.dev/provenance/v0.2": + return _extract_from_slsa_v02(payload) + if predicate_type == "https://slsa.dev/provenance/v0.1": + return _extract_from_slsa_v01(payload) + if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1": + return _extract_from_witness_provenance(payload) + + logger.debug( + "Extraction from provenance not supported for versions: predicate_type %s, in-toto %s.", + predicate_type, + payload.__class__, + ) + return "", "" + + +def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: + """Extract the repository and commit metadata from the slsa v01 provenance payload.""" + predicate: dict[str, JsonType] | None = payload.statement.get("predicate") + if not predicate: + return "", "" + + # The repository URL and commit are stored inside an entry in the list of predicate -> materials. + # In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry. + list_index = _json_extract(predicate, ["recipe", "definedInMaterial"], int) + if not list_index: + return "", "" + + material_list = _json_extract(predicate, ["materials"], list) + if not material_list: + return "", "" + + if list_index >= len(material_list): + return "", "" + material = material_list[list_index] + if not material or not isinstance(material, dict): + return "", "" + + uri = material.get("uri") + if not uri: + logger.debug("Could not extract repository URL.") + repo = _clean_spdx(uri) + + digest_set = material.get("digest") + if not digest_set or not isinstance(digest_set, dict): + return "", "" + commit = _extract_commit_from_digest(digest_set) + if not commit: + logger.debug("Could not extract commit.") + return "", "" + + return repo, commit + + +def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: + """Extract the repository and commit metadata from the slsa v02 provenance payload.""" + predicate: dict[str, JsonType] | None = payload.statement.get("predicate") + if not predicate: + return "", "" + + # The repository URL and commit are stored within the predicate -> invocation -> configSource object. + # See https://slsa.dev/spec/v0.2/provenance + uri = _json_extract(predicate, ["invocation", "configSource", "uri"], str) + if not uri: + logger.debug("Could not extract repo URL.") + return "", "" + repo = _clean_spdx(uri) + + digest_set = _json_extract(predicate, ["invocation", "configSource", "digest"], dict) + if not digest_set: + return "", "" + commit = _extract_commit_from_digest(digest_set) + if not commit: + logger.debug("Could not extract commit.") + return "", "" + + return repo, commit + + +def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: + """Extract the repository and commit metadata from the slsa v1 provenance payload.""" + predicate: dict[str, JsonType] | None = payload.statement.get("predicate") + if not predicate: + return "", "" + + build_def = _json_extract(predicate, ["buildDefinition"], dict) + if not build_def: + return "", "" + build_type = _json_extract(build_def, ["buildType"], str) + if not build_type: + return "", "" + + # Extract the repository URL. + repo = None + if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": + repo = _json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) + if not repo: + repo = _json_extract(build_def, ["externalParameters", "configSource", "repository"], str) + if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": + repo = _json_extract(build_def, ["externalParameters", "workflow", "repository"], str) + + if not repo: + logger.debug("Failed to extract repository URL from provenance.") + return "", "" + + # Extract the commit hash. + commit = None + deps = _json_extract(build_def, ["resolvedDependencies"], list) + if not deps: + return "", "" + for dep in deps: + if not isinstance(dep, dict): + continue + uri = dep["uri"] + url = _clean_spdx(uri) + if url != repo: + continue + if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": + commit_dict = _json_extract(dep, ["digest"], dict) + if not commit_dict: + continue + commit = _extract_commit_from_digest(commit_dict) + if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": + commit = _json_extract(dep, ["digest", "gitCommit"], str) + + if not commit: + logger.debug("Failed to extract commit hash from provenance.") + return "", "" + + return repo, commit + + +def _extract_commit_from_digest(digest: dict[str, JsonType]) -> str | None: + """Extract the commit from the passed DigestSet. + + The DigestSet is an in-toto object that maps algorithm types to commit hashes (digests). + """ + # TODO decide on a preference for which algorithm to accept. + if len(digest.keys()) > 1: + logger.debug("DigestSet contains multiple algorithms: %s", digest.keys()) + + for key in digest: + if key in intoto.v1.VALID_ALGORITHMS: + value = digest.get(key) + if isinstance(value, str): + return value + return None + + +def _clean_spdx(uri: str) -> str: + """Clean the passed SPDX URI and return the normalised URL it represents. + + A SPDX URI has the form: git+https://example.com@refs/heads/main + """ + url, _, _ = uri.lstrip("git+").rpartition("@") + return url + + +def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, str]: + """Extract the repository and commit metadata from the witness provenance file found at the passed path. + + To successfully return the commit and repository URL, the payload must respectively contain a Git attestation, and + either a GitHub or GitLab attestation. + + Parameters + ---------- + payload: InTotoPayload + The payload to extract from. + + Returns + ------- + tuple[str, str] + The repository URL and commit hash if found, a pair of empty strings otherwise. + """ + predicate: dict[str, JsonType] | None = payload.statement.get("predicate") + if not predicate: + return "", "" + attestations = _json_extract(predicate, ["attestations"], list) + if not attestations: + return "", "" + commit: str | None = None + repo: str | None = None + for entry in attestations: + if not isinstance(entry, dict): + continue + entry_type = entry.get("type") + if not entry_type: + continue + if entry_type.startswith("https://witness.dev/attestations/git/"): + commit = _json_extract(entry, ["attestation", "commithash"], str) + elif entry_type.startswith("https://witness.dev/attestations/gitlab/") or entry_type.startswith( + "https://witness.dev/attestations/github/" + ): + repo = _json_extract(entry, ["attestation", "projecturl"], str) + + if not commit or not repo: + logger.debug("Could not extract repo and commit from provenance.") + return "", "" + + return repo, commit + + +@overload +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[int]) -> int | None: + ... + + +@overload +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[list]) -> list | None: + ... + + +@overload +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[dict]) -> dict | None: + ... + + +@overload +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[str]) -> str | None: + ... + + +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[JsonType]) -> JsonType: + """Return the value found by following the list of depth-sequential keys inside the passed dictionary. + + The value's type is validated against the passed type. + """ + target = entry + for index, key in enumerate(keys): + if key not in target: + logger.debug("Key not found in JSON: %s", key) + return None + next_target = target[key] + if index == len(keys) - 1: + if isinstance(next_target, type_): + return next_target + else: + if not isinstance(next_target, dict): + logger.debug("Expected dict found: %s", next_target.__class__) + break + target = next_target + + logger.debug("Failed to find %s in JSON dictionary", " > ".join(keys)) + return None From 6c95043b7f992b81f1fe33387309b026c92c6e33 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 5 Mar 2024 16:37:13 +1000 Subject: [PATCH 03/25] chore: keep code related to with statement in the statement block; add debug output for provenance extractor success Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 28 ++- src/macaron/repo_finder/provenance_finder.py | 232 ++++++++++++++++++ 2 files changed, 249 insertions(+), 11 deletions(-) create mode 100644 src/macaron/repo_finder/provenance_finder.py diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 36ed813ed..7fca2c82e 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -25,25 +25,31 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str tuple[str, str] The repository URL and commit hash if found, a pair of empty strings otherwise. """ + repo = "" + commit = "" predicate_type = payload.statement.get("predicateType") if isinstance(payload, InTotoV1Payload): if isinstance(payload, InTotoV1Payload): if predicate_type == "https://slsa.dev/provenance/v1": - return _extract_from_slsa_v1(payload) + repo, commit = _extract_from_slsa_v1(payload) elif isinstance(payload, InTotoV01Payload): if predicate_type == "https://slsa.dev/provenance/v0.2": - return _extract_from_slsa_v02(payload) + repo, commit = _extract_from_slsa_v02(payload) if predicate_type == "https://slsa.dev/provenance/v0.1": - return _extract_from_slsa_v01(payload) + repo, commit = _extract_from_slsa_v01(payload) if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1": - return _extract_from_witness_provenance(payload) - - logger.debug( - "Extraction from provenance not supported for versions: predicate_type %s, in-toto %s.", - predicate_type, - payload.__class__, - ) - return "", "" + repo, commit = _extract_from_witness_provenance(payload) + + if not repo or not commit: + logger.debug( + "Extraction from provenance not supported for versions: predicate_type %s, in-toto %s.", + predicate_type, + payload.__class__, + ) + return "", "" + + logger.debug("Extracted repo and commit from provenance: %s, %s", repo, commit) + return repo, commit def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: diff --git a/src/macaron/repo_finder/provenance_finder.py b/src/macaron/repo_finder/provenance_finder.py new file mode 100644 index 000000000..42463f13d --- /dev/null +++ b/src/macaron/repo_finder/provenance_finder.py @@ -0,0 +1,232 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains methods for finding provenance files.""" +import logging +import os +import tempfile + +from packageurl import PackageURL + +from macaron.config.defaults import defaults +from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type +from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException +from macaron.slsa_analyzer.package_registry import JFrogMavenRegistry, NPMRegistry +from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset +from macaron.slsa_analyzer.provenance.intoto import InTotoPayload +from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError +from macaron.slsa_analyzer.provenance.loader import load_provenance_payload +from macaron.slsa_analyzer.provenance.witness import is_witness_provenance_payload, load_witness_verifier_config + +logger: logging.Logger = logging.getLogger(__name__) + + +class ProvenanceFinder: + """This class is used to find and retrieve provenance files from supported registries.""" + + def __init__(self) -> None: + self.last_provenance_payload: InTotoPayload | None = None + + def find_provenance(self, purl: PackageURL) -> InTotoPayload | None: + """Find the provenance files of the passed PURL. + + Parameters + ---------- + purl: PackageURL + The PURL to find provenance for. + + Returns + ------- + InTotoPayload | None + The provenance payload if found, or None. + """ + if determine_abstract_purl_type(purl) == AbstractPurlType.REPOSITORY: + # Do not perform this function for repository type targets. + self.last_provenance_payload = None + + if purl.type == "npm": + self.last_provenance_payload = ProvenanceFinder.find_npm_provenance(purl) + elif purl.type in ["gradle", "maven"]: + self.last_provenance_payload = ProvenanceFinder.find_gav_provenance(purl) + else: + logger.debug("Provenance finding not supported for PURL type: %s", purl.type) + self.last_provenance_payload = None + + return self.last_provenance_payload + + @staticmethod + def find_npm_provenance(purl: PackageURL) -> InTotoPayload | None: + """Find and download the NPM based provenance for the passed PURL. + + Parameters + ---------- + purl: PackageURL + The PURL of the analysis target. + + Returns + ------- + InTotoPayload | None + The provenance payload if found, or None. + """ + # Retrieve NPM registry configuration values. + npm_section = "package_registry.npm" + if not defaults.has_section(npm_section): + logger.debug("No NPM section found in config.") + return None + if not defaults.get(npm_section, "enabled"): + logger.debug("NPM section disabled in config.") + return None + + hostname = defaults.get(npm_section, "hostname") + attestation_endpoint = defaults.get(npm_section, "attestation_endpoint") + try: + request_timeout = int(defaults.get(npm_section, "request_timeout")) + except ValueError as error: + logger.debug("Invalid value for NPM package registry timeout: %s", error) + return None + # Create registry from configuration values. + npm_registry = NPMRegistry(hostname, attestation_endpoint, request_timeout) + + namespace = purl.namespace or "" + artifact_id = purl.name + version = purl.version + + if not purl.version: + version = npm_registry.get_latest_version(namespace, artifact_id) + + if not version: + logger.debug("Missing version for NPM package.") + return None + + # The size of the asset (in bytes) is added to match the AssetLocator + # protocol and is not used because npm API registry does not provide it, so it is set to zero. + npm_provenance_asset = NPMAttestationAsset( + namespace=namespace, + artifact_id=artifact_id, + version=version, + npm_registry=npm_registry, + size_in_bytes=0, + ) + try: + with tempfile.TemporaryDirectory() as temp_dir: + download_path = os.path.join(temp_dir, f"{artifact_id}.intoto.jsonl") + if not npm_provenance_asset.download(download_path): + logger.debug("Unable to find an npm provenance for %s@%s", artifact_id, version) + return None + + try: + # Load the provenance file. + provenance_payload = load_provenance_payload(download_path) + except LoadIntotoAttestationError as loadintotoerror: + logger.error("Error while loading provenance %s", loadintotoerror) + return None + + return provenance_payload + except OSError as error: + logger.error("Error while storing provenance in the temporary directory: %s", error) + return None + + @staticmethod + def find_gav_provenance(purl: PackageURL) -> InTotoPayload | None: + """Find and download the GAV based provenance for the passed PURL. + + Parameters + ---------- + purl: PackageURL + The PURL of the analysis target. + + Returns + ------- + InTotoPayload | None + The provenance payload if found, or None. + + """ + jfrog_section = "package_registry.jfrog.maven" + if not defaults.has_section(jfrog_section): + logger.debug("No JFrog section found in config.") + return None + + try: + request_timeout = defaults.getint(jfrog_section, "request_timeout") + download_timeout = defaults.getint(jfrog_section, "download_timeout") + except ValueError as error: + logger.debug("Failed to parse default value as int: %s", error) + return None + + jfrog_registry = JFrogMavenRegistry( + defaults.get(jfrog_section, "hostname"), + defaults.get(jfrog_section, "repo"), + request_timeout, + download_timeout, + ) + + provenance_extensions = defaults.get_list( + "slsa.verifier", + "provenance_extensions", + fallback=["intoto.jsonl"], + ) + + provenance_assets = jfrog_registry.fetch_assets( + group_id=purl.namespace if purl.namespace else "", + artifact_id=purl.name, + version=purl.version if purl.version else "", + extensions=set(provenance_extensions), + ) + + if not provenance_assets: + return None + + max_valid_provenance_size = defaults.getint( + "slsa.verifier", + "max_download_size", + fallback=1000000, + ) + + for provenance_asset in provenance_assets: + if provenance_asset.size_in_bytes > max_valid_provenance_size: + msg = ( + f"The provenance asset {provenance_asset.name} unexpectedly exceeds the " + f"max valid file size of {max_valid_provenance_size} (bytes). " + "The check will not proceed due to potential security risks." + ) + logger.error(msg) + raise ProvenanceAvailableException(msg) + + provenance_filepaths = [] + try: + with tempfile.TemporaryDirectory() as temp_dir: + for provenance_asset in provenance_assets: + provenance_filepath = os.path.join(temp_dir, provenance_asset.name) + if not provenance_asset.download(provenance_filepath): + logger.debug( + "Could not download the provenance %s. Skip verifying...", + provenance_asset.name, + ) + continue + provenance_filepaths.append(provenance_filepath) + except OSError as error: + logger.error("Error while storing provenance in the temporary directory: %s", error) + + provenances = [] + witness_verifier_config = load_witness_verifier_config() + + for provenance_filepath in provenance_filepaths: + try: + provenance_payload = load_provenance_payload(provenance_filepath) + except LoadIntotoAttestationError as error: + logger.error("Error while loading provenance: %s", error) + continue + + if not is_witness_provenance_payload(provenance_payload, witness_verifier_config.predicate_types): + continue + + provenances.append(provenance_payload) + + if not provenances: + logger.debug("No payloads found in provenance files.") + return None + + # TODO decide what to do when multiple provenance payloads are present. + provenance = provenances[0] + + return provenance From fa1bbc7d1fd2ecc01ac0872bac00d62a9ba22329 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 7 Mar 2024 09:46:07 +1000 Subject: [PATCH 04/25] chore: replace overload with TypeVar Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 7fca2c82e..43db88e35 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -3,7 +3,7 @@ """This module contains methods for extracting repository and commit metadata from provenance files.""" import logging -from typing import overload +from typing import TypeVar from macaron.slsa_analyzer.provenance import intoto from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload @@ -238,27 +238,10 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st return repo, commit -@overload -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[int]) -> int | None: - ... +T = TypeVar("T", bound=JsonType) -@overload -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[list]) -> list | None: - ... - - -@overload -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[dict]) -> dict | None: - ... - - -@overload -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[str]) -> str | None: - ... - - -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[JsonType]) -> JsonType: +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T | None: """Return the value found by following the list of depth-sequential keys inside the passed dictionary. The value's type is validated against the passed type. From e1ffe975be5987e012781380b38384b5c7de70cc Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 8 Mar 2024 13:26:57 +1000 Subject: [PATCH 05/25] chore: remove duplicate if statement; replace x.__class__ with str(type(x)); use default JFrog registry; only pass real values to JFrog fetch function; rename digest function to digest_set; copy intoto algorithms to v01, and add as input to _extract_commit_from_digest_set function; make provenance_extractor raise exceptions instead of returning empty tuples, and refactor accordingly; add gitCommit digest set type to v1 algorithms. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 171 ++++++++---------- src/macaron/repo_finder/provenance_finder.py | 79 ++++---- src/macaron/repo_finder/repo_finder.py | 2 +- src/macaron/slsa_analyzer/analyzer.py | 70 +++++-- .../provenance/intoto/v01/__init__.py | 25 +++ .../provenance/intoto/v1/__init__.py | 6 +- 6 files changed, 198 insertions(+), 155 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 43db88e35..59196ec44 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -5,6 +5,7 @@ import logging from typing import TypeVar +from macaron.errors import MacaronError from macaron.slsa_analyzer.provenance import intoto from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload from macaron.util import JsonType @@ -12,6 +13,10 @@ logger: logging.Logger = logging.getLogger(__name__) +class ProvenanceExtractionException(MacaronError): + """When there is an error while extracting from provenance.""" + + def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str, str]: """Extract the repository and commit metadata from the passed provenance payload. @@ -24,14 +29,18 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str ------- tuple[str, str] The repository URL and commit hash if found, a pair of empty strings otherwise. + + Raises + ------ + ProvenanceExtractionException + If the extraction process fails for any reason. """ repo = "" commit = "" predicate_type = payload.statement.get("predicateType") if isinstance(payload, InTotoV1Payload): - if isinstance(payload, InTotoV1Payload): - if predicate_type == "https://slsa.dev/provenance/v1": - repo, commit = _extract_from_slsa_v1(payload) + if predicate_type == "https://slsa.dev/provenance/v1": + repo, commit = _extract_from_slsa_v1(payload) elif isinstance(payload, InTotoV01Payload): if predicate_type == "https://slsa.dev/provenance/v0.2": repo, commit = _extract_from_slsa_v02(payload) @@ -41,12 +50,12 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str repo, commit = _extract_from_witness_provenance(payload) if not repo or not commit: - logger.debug( - "Extraction from provenance not supported for versions: predicate_type %s, in-toto %s.", - predicate_type, - payload.__class__, + msg = ( + f"Extraction from provenance not supported for versions: " + f"predicate_type {predicate_type}, in-toto {str(type(payload))}." ) - return "", "" + logger.error(msg) + raise ProvenanceExtractionException(msg) logger.debug("Extracted repo and commit from provenance: %s, %s", repo, commit) return repo, commit @@ -56,36 +65,23 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v01 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - return "", "" + raise ProvenanceExtractionException("No predicate in payload statement.") # The repository URL and commit are stored inside an entry in the list of predicate -> materials. # In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry. list_index = _json_extract(predicate, ["recipe", "definedInMaterial"], int) - if not list_index: - return "", "" - material_list = _json_extract(predicate, ["materials"], list) - if not material_list: - return "", "" - if list_index >= len(material_list): - return "", "" + raise ProvenanceExtractionException("Material list index outside of material list bounds.") material = material_list[list_index] if not material or not isinstance(material, dict): - return "", "" + raise ProvenanceExtractionException("Indexed material list entry is invalid.") - uri = material.get("uri") - if not uri: - logger.debug("Could not extract repository URL.") + uri = _json_extract(material, ["uri"], str) repo = _clean_spdx(uri) - digest_set = material.get("digest") - if not digest_set or not isinstance(digest_set, dict): - return "", "" - commit = _extract_commit_from_digest(digest_set) - if not commit: - logger.debug("Could not extract commit.") - return "", "" + digest_set = _json_extract(material, ["digest"], dict) + commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) return repo, commit @@ -94,23 +90,15 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v02 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - return "", "" + raise ProvenanceExtractionException("No predicate in payload statement.") # The repository URL and commit are stored within the predicate -> invocation -> configSource object. # See https://slsa.dev/spec/v0.2/provenance uri = _json_extract(predicate, ["invocation", "configSource", "uri"], str) - if not uri: - logger.debug("Could not extract repo URL.") - return "", "" repo = _clean_spdx(uri) digest_set = _json_extract(predicate, ["invocation", "configSource", "digest"], dict) - if not digest_set: - return "", "" - commit = _extract_commit_from_digest(digest_set) - if not commit: - logger.debug("Could not extract commit.") - return "", "" + commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) return repo, commit @@ -119,81 +107,43 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v1 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - return "", "" + raise ProvenanceExtractionException("No predicate in payload statement.") build_def = _json_extract(predicate, ["buildDefinition"], dict) - if not build_def: - return "", "" build_type = _json_extract(build_def, ["buildType"], str) - if not build_type: - return "", "" # Extract the repository URL. - repo = None + repo = "" if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": - repo = _json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) - if not repo: + try: + repo = _json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) + except ProvenanceExtractionException: repo = _json_extract(build_def, ["externalParameters", "configSource", "repository"], str) if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": repo = _json_extract(build_def, ["externalParameters", "workflow", "repository"], str) if not repo: - logger.debug("Failed to extract repository URL from provenance.") - return "", "" + raise ProvenanceExtractionException("Failed to extract repository URL from provenance.") # Extract the commit hash. - commit = None + commit = "" deps = _json_extract(build_def, ["resolvedDependencies"], list) - if not deps: - return "", "" for dep in deps: if not isinstance(dep, dict): continue - uri = dep["uri"] + uri = _json_extract(dep, ["uri"], str) url = _clean_spdx(uri) if url != repo: continue - if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": - commit_dict = _json_extract(dep, ["digest"], dict) - if not commit_dict: - continue - commit = _extract_commit_from_digest(commit_dict) - if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": - commit = _json_extract(dep, ["digest", "gitCommit"], str) + digest_set = _json_extract(dep, ["digest"], dict) + commit = _extract_commit_from_digest_set(digest_set, intoto.v1.VALID_ALGORITHMS) if not commit: - logger.debug("Failed to extract commit hash from provenance.") - return "", "" + raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") return repo, commit -def _extract_commit_from_digest(digest: dict[str, JsonType]) -> str | None: - """Extract the commit from the passed DigestSet. - - The DigestSet is an in-toto object that maps algorithm types to commit hashes (digests). - """ - # TODO decide on a preference for which algorithm to accept. - if len(digest.keys()) > 1: - logger.debug("DigestSet contains multiple algorithms: %s", digest.keys()) - - for key in digest: - if key in intoto.v1.VALID_ALGORITHMS: - value = digest.get(key) - if isinstance(value, str): - return value - return None - - -def _clean_spdx(uri: str) -> str: - """Clean the passed SPDX URI and return the normalised URL it represents. - - A SPDX URI has the form: git+https://example.com@refs/heads/main - """ - url, _, _ = uri.lstrip("git+").rpartition("@") - return url - - def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the witness provenance file found at the passed path. @@ -212,12 +162,11 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st """ predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - return "", "" + raise ProvenanceExtractionException("No predicate in payload statement.") + attestations = _json_extract(predicate, ["attestations"], list) - if not attestations: - return "", "" - commit: str | None = None - repo: str | None = None + commit = "" + repo = "" for entry in attestations: if not isinstance(entry, dict): continue @@ -232,16 +181,41 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st repo = _json_extract(entry, ["attestation", "projecturl"], str) if not commit or not repo: - logger.debug("Could not extract repo and commit from provenance.") - return "", "" + raise ProvenanceExtractionException("Could not extract repo and commit from provenance.") return repo, commit +def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algorithms: list[str]) -> str: + """Extract the commit from the passed DigestSet. + + The DigestSet is an in-toto object that maps algorithm types to commit hashes (digests). + """ + # TODO decide on a preference for which algorithm to accept. + if len(digest_set.keys()) > 1: + logger.debug("DigestSet contains multiple algorithms: %s", digest_set.keys()) + + for key in digest_set: + if key in valid_algorithms: + value = digest_set.get(key) + if isinstance(value, str): + return value + raise ProvenanceExtractionException("No valid digest in digest set.") + + +def _clean_spdx(uri: str) -> str: + """Clean the passed SPDX URI and return the normalised URL it represents. + + A SPDX URI has the form: git+https://example.com@refs/heads/main + """ + url, _, _ = uri.lstrip("git+").rpartition("@") + return url + + T = TypeVar("T", bound=JsonType) -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T | None: +def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T: """Return the value found by following the list of depth-sequential keys inside the passed dictionary. The value's type is validated against the passed type. @@ -249,17 +223,14 @@ def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) - target = entry for index, key in enumerate(keys): if key not in target: - logger.debug("Key not found in JSON: %s", key) - return None + raise ProvenanceExtractionException(f"JSON key not found: {key}") next_target = target[key] if index == len(keys) - 1: if isinstance(next_target, type_): return next_target else: if not isinstance(next_target, dict): - logger.debug("Expected dict found: %s", next_target.__class__) - break + raise ProvenanceExtractionException(f"Extract value from non-dict type: {str(type(next_target))}") target = next_target - logger.debug("Failed to find %s in JSON dictionary", " > ".join(keys)) - return None + raise ProvenanceExtractionException(f"Failed to find '{' > '.join(keys)}' as type '{type_}' in JSON dictionary.") diff --git a/src/macaron/repo_finder/provenance_finder.py b/src/macaron/repo_finder/provenance_finder.py index 42463f13d..957092593 100644 --- a/src/macaron/repo_finder/provenance_finder.py +++ b/src/macaron/repo_finder/provenance_finder.py @@ -11,7 +11,7 @@ from macaron.config.defaults import defaults from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException -from macaron.slsa_analyzer.package_registry import JFrogMavenRegistry, NPMRegistry +from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, JFrogMavenRegistry, NPMRegistry from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset from macaron.slsa_analyzer.provenance.intoto import InTotoPayload from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError @@ -26,6 +26,15 @@ class ProvenanceFinder: def __init__(self) -> None: self.last_provenance_payload: InTotoPayload | None = None + registries = PACKAGE_REGISTRIES + self.npm_registry: NPMRegistry | None = None + self.jfrog_registry: JFrogMavenRegistry | None = None + if registries: + for registry in registries: + if isinstance(registry, NPMRegistry): + self.npm_registry = registry + elif isinstance(registry, JFrogMavenRegistry): + self.jfrog_registry = registry def find_provenance(self, purl: PackageURL) -> InTotoPayload | None: """Find the provenance files of the passed PURL. @@ -42,12 +51,20 @@ def find_provenance(self, purl: PackageURL) -> InTotoPayload | None: """ if determine_abstract_purl_type(purl) == AbstractPurlType.REPOSITORY: # Do not perform this function for repository type targets. - self.last_provenance_payload = None + return None + + self.last_provenance_payload = None if purl.type == "npm": - self.last_provenance_payload = ProvenanceFinder.find_npm_provenance(purl) + if self.npm_registry: + self.last_provenance_payload = ProvenanceFinder.find_npm_provenance(purl, self.npm_registry) + else: + logger.debug("Missing npm registry to find provenance in.") elif purl.type in ["gradle", "maven"]: - self.last_provenance_payload = ProvenanceFinder.find_gav_provenance(purl) + if self.jfrog_registry: + self.last_provenance_payload = ProvenanceFinder.find_gav_provenance(purl, self.jfrog_registry) + else: + logger.debug("Missing JFrog registry to find provenance in.") else: logger.debug("Provenance finding not supported for PURL type: %s", purl.type) self.last_provenance_payload = None @@ -55,37 +72,24 @@ def find_provenance(self, purl: PackageURL) -> InTotoPayload | None: return self.last_provenance_payload @staticmethod - def find_npm_provenance(purl: PackageURL) -> InTotoPayload | None: + def find_npm_provenance(purl: PackageURL, npm_registry: NPMRegistry) -> InTotoPayload | None: """Find and download the NPM based provenance for the passed PURL. Parameters ---------- purl: PackageURL The PURL of the analysis target. + npm_registry: NPMRegistry + The npm registry to find provenance in. Returns ------- InTotoPayload | None The provenance payload if found, or None. """ - # Retrieve NPM registry configuration values. - npm_section = "package_registry.npm" - if not defaults.has_section(npm_section): - logger.debug("No NPM section found in config.") - return None - if not defaults.get(npm_section, "enabled"): - logger.debug("NPM section disabled in config.") - return None - - hostname = defaults.get(npm_section, "hostname") - attestation_endpoint = defaults.get(npm_section, "attestation_endpoint") - try: - request_timeout = int(defaults.get(npm_section, "request_timeout")) - except ValueError as error: - logger.debug("Invalid value for NPM package registry timeout: %s", error) + if not npm_registry.enabled: + logger.debug("The npm registry is not enabled.") return None - # Create registry from configuration values. - npm_registry = NPMRegistry(hostname, attestation_endpoint, request_timeout) namespace = purl.namespace or "" artifact_id = purl.name @@ -127,39 +131,34 @@ def find_npm_provenance(purl: PackageURL) -> InTotoPayload | None: return None @staticmethod - def find_gav_provenance(purl: PackageURL) -> InTotoPayload | None: + def find_gav_provenance(purl: PackageURL, jfrog_registry: JFrogMavenRegistry) -> InTotoPayload | None: """Find and download the GAV based provenance for the passed PURL. Parameters ---------- purl: PackageURL The PURL of the analysis target. + jfrog_registry: JFrogMavenRegistry + The JFrog registry to find provenance in. Returns ------- InTotoPayload | None The provenance payload if found, or None. + Raises + ------ + ProvenanceAvailableException + If the discovered provenance file size exceeds the configured limit. """ - jfrog_section = "package_registry.jfrog.maven" - if not defaults.has_section(jfrog_section): - logger.debug("No JFrog section found in config.") + if not jfrog_registry.enabled: + logger.debug("JFrog registry not enabled.") return None - try: - request_timeout = defaults.getint(jfrog_section, "request_timeout") - download_timeout = defaults.getint(jfrog_section, "download_timeout") - except ValueError as error: - logger.debug("Failed to parse default value as int: %s", error) + if not purl.namespace or not purl.version: + logger.debug("Missing purl namespace or version for finding provenance in JFrog registry.") return None - jfrog_registry = JFrogMavenRegistry( - defaults.get(jfrog_section, "hostname"), - defaults.get(jfrog_section, "repo"), - request_timeout, - download_timeout, - ) - provenance_extensions = defaults.get_list( "slsa.verifier", "provenance_extensions", @@ -167,9 +166,9 @@ def find_gav_provenance(purl: PackageURL) -> InTotoPayload | None: ) provenance_assets = jfrog_registry.fetch_assets( - group_id=purl.namespace if purl.namespace else "", + group_id=purl.namespace, artifact_id=purl.name, - version=purl.version if purl.version else "", + version=purl.version, extensions=set(provenance_extensions), ) diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index 999ce0f87..d365f34d8 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -74,7 +74,7 @@ def find_repo(purl: PackageURL) -> str: return "" # Call Repo Finder and return first valid URL - logger.debug("Analyzing %s with Repo Finder: %s", purl.to_string(), repo_finder.__class__) + logger.debug("Analyzing %s with Repo Finder: %s", purl.to_string(), str(type(repo_finder))) return repo_finder.find_repo(purl) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 7eab59b43..599991357 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -26,6 +26,11 @@ from macaron.output_reporter.results import Record, Report, SCMStatus from macaron.repo_finder import repo_finder from macaron.repo_finder.commit_finder import find_commit +from macaron.repo_finder.provenance_extractor import ( + ProvenanceExtractionException, + extract_repo_and_commit_from_provenance, +) +from macaron.repo_finder.provenance_finder import ProvenanceFinder from macaron.slsa_analyzer import git_url from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.asset import VirtualReleaseAsset @@ -116,7 +121,7 @@ def run( user_config: dict, sbom_path: str = "", skip_deps: bool = False, - prov_payload: InTotoPayload | None = None, + provenance_payload: InTotoPayload | None = None, ) -> int: """Run the analysis and write results to the output path. @@ -131,7 +136,7 @@ def run( The path to the SBOM. skip_deps : bool Flag to skip dependency resolution. - prov_payload : InToToPayload | None + provenance_payload : InToToPayload | None The provenance intoto payload for the main software component. Returns @@ -165,7 +170,7 @@ def run( main_record = self.run_single( main_config, analysis, - prov_payload=prov_payload, + provenance_payload=provenance_payload, ) if main_record.status != SCMStatus.AVAILABLE or not main_record.context: @@ -267,7 +272,7 @@ def run_single( config: Configuration, analysis: Analysis, existing_records: dict[str, Record] | None = None, - prov_payload: InTotoPayload | None = None, + provenance_payload: InTotoPayload | None = None, ) -> Record: """Run the checks for a single repository target. @@ -282,7 +287,7 @@ def run_single( The current analysis instance. existing_records : dict[str, Record] | None The mapping of existing records that the analysis has run successfully. - prov_payload : InToToPayload | None + provenance_payload : InToToPayload | None The provenance intoto payload for the analyzed software component. Returns @@ -292,8 +297,9 @@ def run_single( """ repo_id = config.get_value("id") component = None + provenance_finder = ProvenanceFinder() try: - component = self.add_component(config, analysis, existing_records) + component = self.add_component(config, analysis, provenance_finder, existing_records, provenance_payload) except PURLNotFoundError as error: logger.error(error) return Record( @@ -321,7 +327,10 @@ def run_single( analyze_ctx.dynamic_data["expectation"] = self.expectations.get_expectation_for_target( analyze_ctx.component.purl.split("@")[0] ) - analyze_ctx.dynamic_data["provenance"] = prov_payload + if not provenance_payload: + # Retrieve the provenance file from the finder. May also be None. + provenance_payload = provenance_finder.last_provenance_payload + analyze_ctx.dynamic_data["provenance"] = provenance_payload analyze_ctx.check_results = self.perform_checks(analyze_ctx) return Record( @@ -441,7 +450,12 @@ class AnalysisTarget(NamedTuple): digest: str def add_component( - self, config: Configuration, analysis: Analysis, existing_records: dict[str, Record] | None = None + self, + config: Configuration, + analysis: Analysis, + provenance_finder: ProvenanceFinder, + existing_records: dict[str, Record] | None = None, + provenance_payload: InTotoPayload | None = None, ) -> Component: """Add a software component if it does not exist in the DB already. @@ -454,8 +468,12 @@ def add_component( The configuration for running Macaron. analysis: Analysis The current analysis instance. + provenance_finder: ProvenanceFinder + The provenance finder object to use when finding provenance. existing_records : dict[str, Record] | None The mapping of existing records that the analysis has run successfully. + provenance_payload : InToToPayload | None + The provenance in-toto payload for the software component. Returns ------- @@ -472,7 +490,9 @@ def add_component( # Note: the component created in this function will be added to the database. available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] try: - analysis_target = Analyzer.to_analysis_target(config, available_domains) + analysis_target = Analyzer.to_analysis_target( + config, available_domains, provenance_finder, provenance_payload + ) except InvalidPURLError as error: raise PURLNotFoundError("Invalid input PURL.") from error @@ -528,7 +548,12 @@ def add_component( return Component(purl=analysis_target.parsed_purl.to_string(), analysis=analysis, repository=repository) @staticmethod - def to_analysis_target(config: Configuration, available_domains: list[str]) -> AnalysisTarget: + def to_analysis_target( + config: Configuration, + available_domains: list[str], + provenance_finder: ProvenanceFinder | None = None, + provenance_payload: InTotoPayload | None = None, + ) -> AnalysisTarget: """Resolve the details of a software component from user input. Parameters @@ -538,6 +563,10 @@ def to_analysis_target(config: Configuration, available_domains: list[str]) -> A available_domains : list[str] The list of supported git service host domain. This is used to convert repo-based PURL to a repository path of the corresponding software component. + provenance_finder: ProvenanceFinder + The provenance finder object to use when finding provenance. + provenance_payload : InToToPayload | None + The provenance in-toto payload for the software component. Returns ------- @@ -587,10 +616,29 @@ def to_analysis_target(config: Configuration, available_domains: list[str]) -> A case (_, ""): # If a PURL but no repository path is provided, we try to extract the repository path from the PURL. # Note that we can't always extract the repository path from any provided PURL. - repo = "" converted_repo_path = None + repo: str = "" + digest: str = "" # parsed_purl cannot be None here, but mypy cannot detect that without some extra help. if parsed_purl is not None: + # Try to find repository and commit via provenance. + if not provenance_payload and provenance_finder: + provenance_payload = provenance_finder.find_provenance(parsed_purl) + if provenance_payload: + try: + repo, digest = extract_repo_and_commit_from_provenance(provenance_payload) + except ProvenanceExtractionException as error: + logger.debug("Failed to extract repo and commit from provenance: %s", error) + + if repo and digest: + return Analyzer.AnalysisTarget( + parsed_purl=parsed_purl, + repo_path=repo, + branch="", + digest=digest, + ) + + # The commit was not found from provenance. Proceed with Repo and Commit Finder. converted_repo_path = repo_finder.to_repo_path(parsed_purl, available_domains) if converted_repo_path is None: # Try to find repo from PURL diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index 4e10f3ca8..1833e41be 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -10,6 +10,31 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType +# The full list of cryptographic algorithms supported in in-toto v0.1 provenance. +# These are used as keys within the digest set of the resource descriptors within the subject. +# For v0.1 see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet +VALID_ALGORITHMS = [ + "sha256", + "sha224", + "sha384", + "sha512", + "sha512_224", + "sha512_256", + "sha3_224", + "sha3_256", + "sha3_384", + "sha3_512", + "shake128", + "shake256", + "blake2b", + "blake2s", + "ripemd160", + "sm3", + "gost", + "sha1", + "md5", +] + class InTotoV01Statement(TypedDict): """An in-toto version 0.1 statement. diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py index 8133635b4..9f1b95eb7 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py @@ -11,9 +11,9 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The full list of cryptographic algorithms supported in SLSA v1 provenance. These are used as keys within the digest -# set of the resource descriptors within the subject. -# See: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md +# The full list of cryptographic algorithms supported in in-toto v1 provenance. +# These are used as keys within the digest set of the resource descriptors within the subject. +# For v1 see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md VALID_ALGORITHMS = [ "sha256", "sha224", From ce120a76c586cd4754169a2907293b4f536e6c7e Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 12 Mar 2024 12:47:50 +1000 Subject: [PATCH 06/25] chore: use separate exception for json extract issues; remove redundant property from java repo finder; handle case where npm API returns no version; improve provenance extractor tests. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 99 ++-- .../package_registry/npm_registry.py | 43 ++ .../repo_finder/test_provenance_extractor.py | 455 ++++++++++++++++++ 3 files changed, 561 insertions(+), 36 deletions(-) create mode 100644 tests/repo_finder/test_provenance_extractor.py diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 59196ec44..81726aed4 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -38,23 +38,27 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str repo = "" commit = "" predicate_type = payload.statement.get("predicateType") - if isinstance(payload, InTotoV1Payload): - if predicate_type == "https://slsa.dev/provenance/v1": - repo, commit = _extract_from_slsa_v1(payload) - elif isinstance(payload, InTotoV01Payload): - if predicate_type == "https://slsa.dev/provenance/v0.2": - repo, commit = _extract_from_slsa_v02(payload) - if predicate_type == "https://slsa.dev/provenance/v0.1": - repo, commit = _extract_from_slsa_v01(payload) - if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1": - repo, commit = _extract_from_witness_provenance(payload) + try: + if isinstance(payload, InTotoV1Payload): + if predicate_type == "https://slsa.dev/provenance/v1": + repo, commit = _extract_from_slsa_v1(payload) + elif isinstance(payload, InTotoV01Payload): + if predicate_type == "https://slsa.dev/provenance/v0.2": + repo, commit = _extract_from_slsa_v02(payload) + if predicate_type == "https://slsa.dev/provenance/v0.1": + repo, commit = _extract_from_slsa_v01(payload) + if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1": + repo, commit = _extract_from_witness_provenance(payload) + except JsonExtractionException as error: + logger.debug(error) + raise ProvenanceExtractionException("JSON exception while extracting from provenance.") from error if not repo or not commit: msg = ( f"Extraction from provenance not supported for versions: " f"predicate_type {predicate_type}, in-toto {str(type(payload))}." ) - logger.error(msg) + logger.debug(msg) raise ProvenanceExtractionException(msg) logger.debug("Extracted repo and commit from provenance: %s, %s", repo, commit) @@ -69,18 +73,18 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: # The repository URL and commit are stored inside an entry in the list of predicate -> materials. # In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry. - list_index = _json_extract(predicate, ["recipe", "definedInMaterial"], int) - material_list = _json_extract(predicate, ["materials"], list) + list_index = json_extract(predicate, ["recipe", "definedInMaterial"], int) + material_list = json_extract(predicate, ["materials"], list) if list_index >= len(material_list): raise ProvenanceExtractionException("Material list index outside of material list bounds.") material = material_list[list_index] if not material or not isinstance(material, dict): raise ProvenanceExtractionException("Indexed material list entry is invalid.") - uri = _json_extract(material, ["uri"], str) + uri = json_extract(material, ["uri"], str) repo = _clean_spdx(uri) - digest_set = _json_extract(material, ["digest"], dict) + digest_set = json_extract(material, ["digest"], dict) commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) return repo, commit @@ -94,10 +98,10 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: # The repository URL and commit are stored within the predicate -> invocation -> configSource object. # See https://slsa.dev/spec/v0.2/provenance - uri = _json_extract(predicate, ["invocation", "configSource", "uri"], str) + uri = json_extract(predicate, ["invocation", "configSource", "uri"], str) repo = _clean_spdx(uri) - digest_set = _json_extract(predicate, ["invocation", "configSource", "digest"], dict) + digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict) commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) return repo, commit @@ -109,33 +113,33 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: if not predicate: raise ProvenanceExtractionException("No predicate in payload statement.") - build_def = _json_extract(predicate, ["buildDefinition"], dict) - build_type = _json_extract(build_def, ["buildType"], str) + build_def = json_extract(predicate, ["buildDefinition"], dict) + build_type = json_extract(build_def, ["buildType"], str) # Extract the repository URL. repo = "" if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": try: - repo = _json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) - except ProvenanceExtractionException: - repo = _json_extract(build_def, ["externalParameters", "configSource", "repository"], str) + repo = json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) + except JsonExtractionException: + repo = json_extract(build_def, ["externalParameters", "configSource", "repository"], str) if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": - repo = _json_extract(build_def, ["externalParameters", "workflow", "repository"], str) + repo = json_extract(build_def, ["externalParameters", "workflow", "repository"], str) if not repo: raise ProvenanceExtractionException("Failed to extract repository URL from provenance.") # Extract the commit hash. commit = "" - deps = _json_extract(build_def, ["resolvedDependencies"], list) + deps = json_extract(build_def, ["resolvedDependencies"], list) for dep in deps: if not isinstance(dep, dict): continue - uri = _json_extract(dep, ["uri"], str) + uri = json_extract(dep, ["uri"], str) url = _clean_spdx(uri) if url != repo: continue - digest_set = _json_extract(dep, ["digest"], dict) + digest_set = json_extract(dep, ["digest"], dict) commit = _extract_commit_from_digest_set(digest_set, intoto.v1.VALID_ALGORITHMS) if not commit: @@ -164,7 +168,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st if not predicate: raise ProvenanceExtractionException("No predicate in payload statement.") - attestations = _json_extract(predicate, ["attestations"], list) + attestations = json_extract(predicate, ["attestations"], list) commit = "" repo = "" for entry in attestations: @@ -174,11 +178,11 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st if not entry_type: continue if entry_type.startswith("https://witness.dev/attestations/git/"): - commit = _json_extract(entry, ["attestation", "commithash"], str) + commit = json_extract(entry, ["attestation", "commithash"], str) elif entry_type.startswith("https://witness.dev/attestations/gitlab/") or entry_type.startswith( "https://witness.dev/attestations/github/" ): - repo = _json_extract(entry, ["attestation", "projecturl"], str) + repo = json_extract(entry, ["attestation", "projecturl"], str) if not commit or not repo: raise ProvenanceExtractionException("Could not extract repo and commit from provenance.") @@ -212,25 +216,48 @@ def _clean_spdx(uri: str) -> str: return url +class JsonExtractionException(BaseException): + """When there is an error while extracting from JSON.""" + + T = TypeVar("T", bound=JsonType) -def _json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T: - """Return the value found by following the list of depth-sequential keys inside the passed dictionary. +def json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T: + """Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary. + + The value must be truthy, and be of the passed type. + + Parameters + ---------- + entry: dict[str, JsonType] + An entry point into the JSON structure. + keys: list[str] + The list of depth-sequential keys within the JSON. + type: type[T] + The type to check the value against and return it as. - The value's type is validated against the passed type. + Returns + ------- + T: + The found value as the type of the type parameter. + + Raises + ------ + JsonExtractionException + Raised if an error occurs while searching for or validating the value. """ target = entry for index, key in enumerate(keys): if key not in target: - raise ProvenanceExtractionException(f"JSON key not found: {key}") + raise JsonExtractionException(f"JSON key not found: {key}") next_target = target[key] if index == len(keys) - 1: - if isinstance(next_target, type_): + if next_target and isinstance(next_target, type_): return next_target else: if not isinstance(next_target, dict): - raise ProvenanceExtractionException(f"Extract value from non-dict type: {str(type(next_target))}") + raise JsonExtractionException(f"Cannot extract value from non-dict type: {str(type(next_target))}") target = next_target - raise ProvenanceExtractionException(f"Failed to find '{' > '.join(keys)}' as type '{type_}' in JSON dictionary.") + raise JsonExtractionException(f"Failed to find '{' > '.join(keys)}' as type '{type_}' in JSON dictionary.") diff --git a/src/macaron/slsa_analyzer/package_registry/npm_registry.py b/src/macaron/slsa_analyzer/package_registry/npm_registry.py index 6ceb01967..1e38486ae 100644 --- a/src/macaron/slsa_analyzer/package_registry/npm_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/npm_registry.py @@ -185,6 +185,8 @@ def download_attestation_payload(self, url: str, download_path: str) -> bool: logger.debug("dsseEnvelope attribute in the bundle is missing. Skipping...") continue + logger.debug("Found attestation with valid predicateType: %s", att.get("predicateType")) + try: with open(download_path, "w", encoding="utf-8") as file: json.dump(dsse_env, file) @@ -199,6 +201,47 @@ def download_attestation_payload(self, url: str, download_path: str) -> bool: return False + def get_latest_version(self, namespace: str, name: str) -> str | None: + """Try to retrieve the latest version of a package from the registry. + + Parameters + ---------- + namespace: str + The optional namespace of the package. + name: str + The name of the package. + + Returns + ------- + str | None + The latest version of the package, or None if one cannot be found. + """ + if not name: + return None + + url = f"https://{self.hostname}" + if namespace: + url = f"{url}/{namespace}" + url = f"{url}/{name}/latest" + + response = send_get_http_raw(url, timeout=self.request_timeout) + + if not response or not response.text: + logger.debug("No valid response from NPM server for latest version.") + return None + + json_data = json.loads(response.text) + try: + version = json_data["version"] + except KeyError: + version = "" + if not version: + logger.debug("No version found in response from NPM server.") + return None + + logger.debug("Found version for NPM artifact: %s", version) + return version if isinstance(version, str) else str(version) + class NPMAttestationAsset(NamedTuple): """An attestation asset hosted on the npm registry. diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py new file mode 100644 index 000000000..dc4045ce0 --- /dev/null +++ b/tests/repo_finder/test_provenance_extractor.py @@ -0,0 +1,455 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module tests the provenance extractor on valid example provenances.""" +import json + +import pytest + +from macaron.repo_finder.provenance_extractor import ( + JsonExtractionException, + ProvenanceExtractionException, + extract_repo_and_commit_from_provenance, + json_extract, +) +from macaron.slsa_analyzer.provenance.intoto import validate_intoto_payload +from macaron.util import JsonType + + +@pytest.fixture(name="slsa_v1_gcb_1_provenance") +def slsa_v1_gcb_1_provenance_() -> str: + """Return a valid SLSA v1 provenance using build type gcb and sourceToBuild.""" + return """ + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v1", + "predicate": { + "buildDefinition": { + "buildType": "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1", + "externalParameters": { + "sourceToBuild": { + "repository": "https://github.com/oracle/macaron" + } + }, + "resolvedDependencies": [ + { + "uri": "git+https://github.com/oracle/macaron@refs/heads/staging", + "digest": { "sha1": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" } + } + ] + } + } + } + """ + + +@pytest.fixture(name="slsa_v1_gcb_2_provenance") +def slsa_v1_gcb_2_provenance_() -> str: + """Return a valid SLSA v1 provenance using build type gcb and configSource.""" + return """ + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v1", + "predicate": { + "buildDefinition": { + "buildType": "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1", + "externalParameters": { + "configSource": { + "repository": "https://github.com/oracle/macaron" + } + }, + "resolvedDependencies": [ + { + "uri": "git+https://github.com/oracle/macaron@refs/heads/staging", + "digest": { + "sha1": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + } + ] + } + } + } + """ + + +@pytest.fixture(name="slsa_v1_github_provenance") +def slsa_v1_github_provenance_() -> str: + """Return a valid SLSA v1 provenance using build type GitHub.""" + return """ + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v1", + "predicate": { + "buildDefinition": { + "buildType": "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1", + "externalParameters": { + "workflow": { + "repository": "https://github.com/oracle/macaron" + } + }, + "resolvedDependencies": [ + { + "uri": "git+https://github.com/oracle/macaron@refs/heads/staging", + "digest": { + "gitCommit": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + }, + { + "uri": "git+https://github.com/oracle-samples/macaron@refs/heads/main" + } + ] + } + } + } + """ + + +@pytest.fixture(name="slsa_v02_provenance") +def slsa_v02_provenance_() -> str: + """Return a valid SLSA v02 provenance.""" + return """ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v0.2", + "predicate": { + "invocation": { + "configSource": { + "uri": "git+https://github.com/oracle/macaron@refs/heads/staging", + "digest": { + "sha1": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + } + } + } + } + """ + + +@pytest.fixture(name="slsa_v01_provenance") +def slsa_v01_provenance_() -> str: + """Return a valid SLSA v01 provenance.""" + return """ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v0.1", + "predicate": { + "recipe": { + "definedInMaterial": 1 + }, + "materials": [ + { + "uri": "git+https://github.com/oracle-samples/macaron@refs/heads/main" + }, + { + "uri": "git+https://github.com/oracle/macaron@refs/heads/main", + "digest": { + "sha256": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + } + ] + } + } + """ + + +@pytest.fixture(name="target_repository") +def target_repository_() -> str: + """Return the target repository URL.""" + return "https://github.com/oracle/macaron" + + +@pytest.fixture(name="target_commit") +def target_commit_() -> str: + """Return the target commit hash.""" + return "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + + +def test_slsa_v1_gcb_1(slsa_v1_gcb_1_provenance: str, target_repository: str, target_commit: str) -> None: + """Test SLSA v1 provenance with build type gcb and sourceToBuild.""" + payload = json.loads(slsa_v1_gcb_1_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], "") + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository key. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Add repository back. + _json_modify( + payload, + ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], + target_repository, + ) + # Re-test provenance validity. + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Remove commit. + _json_modify(payload, ["predicate", "buildDefinition", "resolvedDependencies"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def test_slsa_v1_gcb_2(slsa_v1_gcb_2_provenance: str, target_repository: str, target_commit: str) -> None: + """Test SLSA v1 provenance with build type gcb and configSource.""" + payload = json.loads(slsa_v1_gcb_2_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], "") + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository key. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Re-add repository key with a bad value. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], "bad") + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def test_slsa_v1_github(slsa_v1_github_provenance: str, target_repository: str, target_commit: str) -> None: + """Test SLSA v1 provenance with build type GitHub.""" + payload = json.loads(slsa_v1_github_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], "") + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository key. + _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def test_slsa_v02(slsa_v02_provenance: str, target_repository: str, target_commit: str) -> None: + """Test SLSA v0.2 provenance.""" + payload = json.loads(slsa_v02_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + _json_modify(payload, ["predicate", "invocation", "configSource", "uri"], "") + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository key. + _json_modify(payload, ["predicate", "invocation", "configSource", "uri"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Re-add repository and re-validate. + _json_modify( + payload, ["predicate", "invocation", "configSource", "uri"], f"git+{target_repository}@refs/heads/main" + ) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Remove commit. + _json_modify(payload, ["predicate", "invocation", "configSource", "digest", "sha1"], None) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def test_slsa_v01(slsa_v01_provenance: str, target_repository: str, target_commit: str) -> None: + """Test SLSA v0.1 provenance.""" + payload = json.loads(slsa_v01_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + materials = json_extract(payload, ["predicate", "materials"], list) + material_index = json_extract(payload, ["predicate", "recipe", "definedInMaterial"], int) + _json_modify(materials[material_index], ["uri"], "") + _json_modify(payload, ["predicate", "materials"], materials) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository. + _json_modify(materials[material_index], ["uri"], None) + _json_modify(payload, ["predicate", "materials"], materials) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Restore repository and re-validate. + _json_modify(materials[material_index], ["uri"], f"git+{target_repository}@refs/heads/main") + _json_modify(payload, ["predicate", "materials"], materials) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set material index to an invalid value. + _json_modify(payload, ["predicate", "recipe", "definedInMaterial"], 10) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +@pytest.fixture(name="witness_gitlab_provenance") +def witness_gitlab_provenance_() -> str: + """Return a Witness v0.1 provenance with a GitLab attestation.""" + return """ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://witness.testifysec.com/attestation-collection/v0.1", + "predicate": { + "name": "test", + "attestations": [ + { + "type": "https://witness.dev/attestations/gitlab/v0.1", + "attestation": { + "projecturl": "https://github.com/oracle/macaron" + } + }, + { + "type": "https://witness.dev/attestations/git/v0.1", + "attestation": { + "commithash": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + } + ] + } + } + """ + + +@pytest.fixture(name="witness_github_provenance") +def witness_github_provenance_() -> str: + """Return a Witness v0.1 provenance with a GitHub attestation.""" + return """ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://witness.testifysec.com/attestation-collection/v0.1", + "predicate": { + "name": "test", + "attestations": [ + { + "type": "https://witness.dev/attestations/github/v0.1", + "attestation": { + "projecturl": "https://github.com/oracle/macaron" + } + }, + { + "type": "https://witness.dev/attestations/git/v0.1", + "attestation": { + "commithash": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + } + } + ] + } + } + """ + + +def test_witness_gitlab(witness_gitlab_provenance: str, target_repository: str, target_commit: str) -> None: + """Test Witness v01 GitLab provenance.""" + payload = json.loads(witness_gitlab_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set repository to an empty string. + attestations = json_extract(payload, ["predicate", "attestations"], list) + _json_modify(attestations[0], ["attestation", "projecturl"], "") + _json_modify(payload, ["attestation"], attestations) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove repository. + _json_modify(attestations[0], ["attestation", "projecturl"], None) + _json_modify(payload, ["attestation"], attestations) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Restore repository and re-validate. + _json_modify(attestations[0], ["attestation", "projecturl"], target_repository) + _json_modify(payload, ["attestation"], attestations) + _perform_provenance_comparison(payload, target_repository, target_commit) + + # Set commit to an empty string. + _json_modify(attestations[1], ["attestation", "commithash"], "") + _json_modify(payload, ["attestation"], attestations) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + # Remove the Git attestation. + _json_modify(payload, ["attestation"], attestations[:1]) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def test_witness_github(witness_github_provenance: str, target_repository: str, target_commit: str) -> None: + """Test Witness v01 GitHub provenance.""" + payload = json.loads(witness_github_provenance) + assert isinstance(payload, dict) + _perform_provenance_comparison(payload, target_repository, target_commit) + + +@pytest.mark.parametrize( + ("type_", "predicate_type"), + [ + ("https://in-toto.io/Statement/v0.1", "https://slsa.dev/provenance/v1"), + ("https://in-toto.io/Statement/v1", "https://slsa.dev/provenance/v0.2"), + ("https://in-toto.io/Statement/v1", "https://slsa.dev/provenance/v0.1"), + ("https://in-toto.io/Statement/v1", "https://witness.testifysec.com/attestation-collection/v0.1"), + ], +) +def test_invalid_type_payloads(type_: str, predicate_type: str) -> None: + """Test payloads with invalid type combinations.""" + payload_text = '{ "_type": ' + f'"{type_}",' + ' "predicateType": ' + f'"{predicate_type}",' + payload_text = f"{payload_text}" + '"subject": [], "predicate": {} }' + payload = json.loads(payload_text) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(payload, "", "") + + +def _perform_provenance_comparison(payload: JsonType, expected_repo: str, expected_commit: str) -> None: + """Accept a provenance and extraction function, assert the extracted values match the expected ones.""" + assert isinstance(payload, dict) + provenance = validate_intoto_payload(payload) + repo, commit = extract_repo_and_commit_from_provenance(provenance) + assert expected_repo == repo + assert expected_commit == commit + + +def _json_modify(entry: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: + """Modify the value found by following the list of depth-sequential keys inside the passed JSON dictionary. + + The found value will be overwritten by the new_value parameter. + If new_value is None, the value will be removed. + If the final key does not exist, it will be created as new_value. + """ + target = entry + for index, key in enumerate(keys): + if key not in target: + if index == len(keys) - 1: + # Add key. + target[key] = new_value + return + raise JsonExtractionException(f"JSON key not found: {key}") + next_target = target[key] + if index == len(keys) - 1: + if new_value is None: + # Remove value. + del target[key] + else: + # Replace value + target[key] = new_value + else: + if not isinstance(next_target, dict): + raise JsonExtractionException(f"Cannot extract value from non-dict type: {str(type(next_target))}") + target = next_target From 1d1a085c0f281502acee94445a4dc08f4bf26cf8 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 13 Mar 2024 12:49:51 +1000 Subject: [PATCH 07/25] chore: refactor stateful provenance finder. Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/provenance_finder.py | 16 +-- src/macaron/slsa_analyzer/analyzer.py | 128 +++++++++++-------- tests/repo_finder/test_repo_finder.py | 3 +- tests/slsa_analyzer/test_analyzer.py | 10 +- 4 files changed, 91 insertions(+), 66 deletions(-) diff --git a/src/macaron/repo_finder/provenance_finder.py b/src/macaron/repo_finder/provenance_finder.py index 957092593..fc4df1126 100644 --- a/src/macaron/repo_finder/provenance_finder.py +++ b/src/macaron/repo_finder/provenance_finder.py @@ -25,7 +25,6 @@ class ProvenanceFinder: """This class is used to find and retrieve provenance files from supported registries.""" def __init__(self) -> None: - self.last_provenance_payload: InTotoPayload | None = None registries = PACKAGE_REGISTRIES self.npm_registry: NPMRegistry | None = None self.jfrog_registry: JFrogMavenRegistry | None = None @@ -53,23 +52,18 @@ def find_provenance(self, purl: PackageURL) -> InTotoPayload | None: # Do not perform this function for repository type targets. return None - self.last_provenance_payload = None - if purl.type == "npm": if self.npm_registry: - self.last_provenance_payload = ProvenanceFinder.find_npm_provenance(purl, self.npm_registry) - else: - logger.debug("Missing npm registry to find provenance in.") + return ProvenanceFinder.find_npm_provenance(purl, self.npm_registry) + logger.debug("Missing npm registry to find provenance in.") elif purl.type in ["gradle", "maven"]: if self.jfrog_registry: - self.last_provenance_payload = ProvenanceFinder.find_gav_provenance(purl, self.jfrog_registry) - else: - logger.debug("Missing JFrog registry to find provenance in.") + return ProvenanceFinder.find_gav_provenance(purl, self.jfrog_registry) + logger.debug("Missing JFrog registry to find provenance in.") else: logger.debug("Provenance finding not supported for PURL type: %s", purl.type) - self.last_provenance_payload = None - return self.last_provenance_payload + return None @staticmethod def find_npm_provenance(purl: PackageURL, npm_registry: NPMRegistry) -> InTotoPayload | None: diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 599991357..04b2c48a5 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -295,11 +295,44 @@ def run_single( Record The record of the analysis for this repository. """ + # Parse the PURL. repo_id = config.get_value("id") + try: + parsed_purl = Analyzer.parse_purl(config) + except InvalidPURLError as error: + logger.error(error) + return Record( + record_id=repo_id, + description=str(error), + pre_config=config, + status=SCMStatus.ANALYSIS_FAILED, + ) + + if not provenance_payload and parsed_purl and not config.get_value("path"): + # Try to find the provenance file for the parsed PURL. + provenance_payload = ProvenanceFinder().find_provenance(parsed_purl) + + # Create the analysis target. + msg = "" + available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] + try: + analysis_target = Analyzer.to_analysis_target(config, available_domains, parsed_purl, provenance_payload) + except InvalidPURLError as error: + logger.debug("Invalid input PURL: %s", error) + msg = "Invalid input PURL." + analysis_target = None + + if not analysis_target or (not analysis_target.parsed_purl and not analysis_target.repo_path): + return Record( + record_id=repo_id, + description=msg or "Cannot determine the analysis as PURL and/or repository path is not provided.", + pre_config=config, + status=SCMStatus.ANALYSIS_FAILED, + ) + component = None - provenance_finder = ProvenanceFinder() try: - component = self.add_component(config, analysis, provenance_finder, existing_records, provenance_payload) + component = self.add_component(analysis, analysis_target, existing_records) except PURLNotFoundError as error: logger.error(error) return Record( @@ -327,9 +360,6 @@ def run_single( analyze_ctx.dynamic_data["expectation"] = self.expectations.get_expectation_for_target( analyze_ctx.component.purl.split("@")[0] ) - if not provenance_payload: - # Retrieve the provenance file from the finder. May also be None. - provenance_payload = provenance_finder.last_provenance_payload analyze_ctx.dynamic_data["provenance"] = provenance_payload analyze_ctx.check_results = self.perform_checks(analyze_ctx) @@ -451,11 +481,9 @@ class AnalysisTarget(NamedTuple): def add_component( self, - config: Configuration, analysis: Analysis, - provenance_finder: ProvenanceFinder, + analysis_target: AnalysisTarget, existing_records: dict[str, Record] | None = None, - provenance_payload: InTotoPayload | None = None, ) -> Component: """Add a software component if it does not exist in the DB already. @@ -464,16 +492,12 @@ def add_component( Parameters ---------- - config: Configuration - The configuration for running Macaron. analysis: Analysis The current analysis instance. - provenance_finder: ProvenanceFinder - The provenance finder object to use when finding provenance. + analysis_target: AnalysisTarget + The target of this analysis. existing_records : dict[str, Record] | None The mapping of existing records that the analysis has run successfully. - provenance_payload : InToToPayload | None - The provenance in-toto payload for the software component. Returns ------- @@ -488,17 +512,6 @@ def add_component( The component is already analyzed in the same session. """ # Note: the component created in this function will be added to the database. - available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] - try: - analysis_target = Analyzer.to_analysis_target( - config, available_domains, provenance_finder, provenance_payload - ) - except InvalidPURLError as error: - raise PURLNotFoundError("Invalid input PURL.") from error - - if not analysis_target.parsed_purl and not analysis_target.repo_path: - raise PURLNotFoundError("Cannot determine the analysis as PURL and/or repository path is not provided.") - repository = None if analysis_target.repo_path: git_obj = self._prepare_repo( @@ -547,11 +560,47 @@ def add_component( # available or not. return Component(purl=analysis_target.parsed_purl.to_string(), analysis=analysis, repository=repository) + @staticmethod + def parse_purl(config: Configuration) -> PackageURL | None: + """Parse the PURL provided in the input. + + Parameters + ---------- + config : Configuration + The target configuration that stores the user input values for the software component. + + Returns + ------- + PackageURL | None + The parsed PURL, or None if one was not provided as input. + + Raises + ------ + InvalidPURLError + If the PURL provided from the user is invalid. + """ + # Due to the current design of Configuration class, repo_path, branch and digest are initialized + # as empty strings, and we assumed that they are always set with input values as non-empty strings. + # Therefore, their true types are ``str``, and an empty string indicates that the input value is not provided. + # The purl might be a PackageURL type, a string, or None, which should be reduced down to an optional + # PackageURL type. + if config.get_value("purl") is None or config.get_value("purl") == "": + return None + purl = config.get_value("purl") + if isinstance(purl, PackageURL): + return purl + try: + # Note that PackageURL.from_string sanitizes the unsafe characters in the purl string, + # which is user-controllable, by calling urllib's `urlsplit` function. + return PackageURL.from_string(purl) + except ValueError as error: + raise InvalidPURLError(f"Invalid input PURL: {purl}") from error + @staticmethod def to_analysis_target( config: Configuration, available_domains: list[str], - provenance_finder: ProvenanceFinder | None = None, + parsed_purl: PackageURL | None, provenance_payload: InTotoPayload | None = None, ) -> AnalysisTarget: """Resolve the details of a software component from user input. @@ -563,8 +612,8 @@ def to_analysis_target( available_domains : list[str] The list of supported git service host domain. This is used to convert repo-based PURL to a repository path of the corresponding software component. - provenance_finder: ProvenanceFinder - The provenance finder object to use when finding provenance. + parsed_purl: PackageURL | None + The PURL to use for the analysis target, or None if one has not been provided. provenance_payload : InToToPayload | None The provenance in-toto payload for the software component. @@ -578,24 +627,6 @@ def to_analysis_target( InvalidPURLError If the PURL provided from the user is invalid. """ - # Due to the current design of Configuration class, repo_path, branch and digest are initialized - # as empty strings, and we assumed that they are always set with input values as non-empty strings. - # Therefore, their true types are ``str``, and an empty string indicates that the input value is not provided. - # The purl might be a PackageURL type, a string, or None, which should be reduced down to an optional - # PackageURL type. - parsed_purl: PackageURL | None - if config.get_value("purl") is None or config.get_value("purl") == "": - parsed_purl = None - elif isinstance(config.get_value("purl"), PackageURL): - parsed_purl = config.get_value("purl") - else: - try: - # Note that PackageURL.from_string sanitizes the unsafe characters in the purl string, - # which is user-controllable, by calling urllib's `urlsplit` function. - parsed_purl = PackageURL.from_string(config.get_value("purl")) - except ValueError as error: - raise InvalidPURLError(f"Invalid input PURL: {config.get_value('purl')}") from error - repo_path_input: str = config.get_value("path") input_branch: str = config.get_value("branch") input_digest: str = config.get_value("digest") @@ -621,10 +652,8 @@ def to_analysis_target( digest: str = "" # parsed_purl cannot be None here, but mypy cannot detect that without some extra help. if parsed_purl is not None: - # Try to find repository and commit via provenance. - if not provenance_payload and provenance_finder: - provenance_payload = provenance_finder.find_provenance(parsed_purl) if provenance_payload: + # Try to find repository and commit via provenance. try: repo, digest = extract_repo_and_commit_from_provenance(provenance_payload) except ProvenanceExtractionException as error: @@ -721,7 +750,6 @@ def _prepare_repo( The pydriller.Git object of the repository or None if error. """ # TODO: separate the logic for handling remote and local repos instead of putting them into this method. - logger.info( "Preparing the repository for the analysis (path=%s, branch=%s, digest=%s)", repo_path, diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py index 6b724d2e2..03b86c4d5 100644 --- a/tests/repo_finder/test_repo_finder.py +++ b/tests/repo_finder/test_repo_finder.py @@ -72,7 +72,8 @@ def test_resolve_analysis_target( config: Configuration, available_domains: list[str], expect: Analyzer.AnalysisTarget ) -> None: """Test the resolve analysis target method with valid inputs.""" - assert Analyzer.to_analysis_target(config, available_domains) == expect + parsed_purl = Analyzer.parse_purl(config) + assert Analyzer.to_analysis_target(config, available_domains, parsed_purl) == expect @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/test_analyzer.py b/tests/slsa_analyzer/test_analyzer.py index d82d6676d..3d305590e 100644 --- a/tests/slsa_analyzer/test_analyzer.py +++ b/tests/slsa_analyzer/test_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the slsa_analyzer.Gh module.""" @@ -103,7 +103,8 @@ def test_resolve_analysis_target( config: Configuration, available_domains: list[str], expect: Analyzer.AnalysisTarget ) -> None: """Test the resolve analysis target method with valid inputs.""" - assert Analyzer.to_analysis_target(config, available_domains) == expect + parsed_purl = Analyzer.parse_purl(config) + assert Analyzer.to_analysis_target(config, available_domains, parsed_purl) == expect @given( @@ -136,7 +137,8 @@ def test_invalid_analysis_target( } ) try: - Analyzer.to_analysis_target(config, available_domains) + purl = Analyzer.parse_purl(config) + Analyzer.to_analysis_target(config, available_domains, purl) except InvalidPURLError: pass @@ -151,4 +153,4 @@ def test_invalid_analysis_target( def test_resolve_analysis_target_invalid_purl(config: Configuration) -> None: """Test the resolve analysis target method with invalid inputs.""" with pytest.raises(InvalidPURLError): - Analyzer.to_analysis_target(config, []) + Analyzer.parse_purl(config) From bded451d0eaf7ae8ea95e98d78bc29843f27a32b Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 13 Mar 2024 13:00:09 +1000 Subject: [PATCH 08/25] chore: use GitLab URL in GitLab provenance test. Signed-off-by: Ben Selwyn-Smith --- tests/repo_finder/test_provenance_extractor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index dc4045ce0..44c6d2e84 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -312,13 +312,13 @@ def witness_gitlab_provenance_() -> str: { "type": "https://witness.dev/attestations/gitlab/v0.1", "attestation": { - "projecturl": "https://github.com/oracle/macaron" + "projecturl": "https://gitlab.com/tinyMediaManager/tinyMediaManager" } }, { "type": "https://witness.dev/attestations/git/v0.1", "attestation": { - "commithash": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + "commithash": "cf6080a92d1c748ba5f05ea16529e05e5c641a49" } } ] @@ -356,8 +356,10 @@ def witness_github_provenance_() -> str: """ -def test_witness_gitlab(witness_gitlab_provenance: str, target_repository: str, target_commit: str) -> None: +def test_witness_gitlab(witness_gitlab_provenance: str) -> None: """Test Witness v01 GitLab provenance.""" + target_repository = "https://gitlab.com/tinyMediaManager/tinyMediaManager" + target_commit = "cf6080a92d1c748ba5f05ea16529e05e5c641a49" payload = json.loads(witness_gitlab_provenance) assert isinstance(payload, dict) _perform_provenance_comparison(payload, target_repository, target_commit) From 50c06077008b3532f6d575fff7bc120ef9a7b56c Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 13 Mar 2024 19:52:09 +1000 Subject: [PATCH 09/25] chore: further refactor analysis target callsite and functionality; refactor provenance extractor tests; assume one provenance per GAV in provenance finder; make npn registry namespace consistent. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 2 +- src/macaron/repo_finder/provenance_finder.py | 4 +- src/macaron/slsa_analyzer/analyzer.py | 36 +- .../package_registry/npm_registry.py | 9 +- .../repo_finder/test_provenance_extractor.py | 393 +++++++++--------- tests/slsa_analyzer/test_analyzer.py | 8 +- 6 files changed, 241 insertions(+), 211 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 81726aed4..409f45538 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -216,7 +216,7 @@ def _clean_spdx(uri: str) -> str: return url -class JsonExtractionException(BaseException): +class JsonExtractionException(MacaronError): """When there is an error while extracting from JSON.""" diff --git a/src/macaron/repo_finder/provenance_finder.py b/src/macaron/repo_finder/provenance_finder.py index fc4df1126..06018a13a 100644 --- a/src/macaron/repo_finder/provenance_finder.py +++ b/src/macaron/repo_finder/provenance_finder.py @@ -85,7 +85,7 @@ def find_npm_provenance(purl: PackageURL, npm_registry: NPMRegistry) -> InTotoPa logger.debug("The npm registry is not enabled.") return None - namespace = purl.namespace or "" + namespace = purl.namespace artifact_id = purl.name version = purl.version @@ -219,7 +219,7 @@ def find_gav_provenance(purl: PackageURL, jfrog_registry: JFrogMavenRegistry) -> logger.debug("No payloads found in provenance files.") return None - # TODO decide what to do when multiple provenance payloads are present. + # We assume that there is only one provenance per GAV. provenance = provenances[0] return provenance diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 04b2c48a5..00f0aca91 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -21,7 +21,14 @@ from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session from macaron.database.table_definitions import Analysis, Component, Repository from macaron.dependency_analyzer import DependencyAnalyzer, DependencyInfo -from macaron.errors import CloneError, DuplicateError, InvalidPURLError, PURLNotFoundError, RepoCheckOutError +from macaron.errors import ( + CloneError, + DuplicateError, + InvalidPURLError, + MacaronError, + PURLNotFoundError, + RepoCheckOutError, +) from macaron.output_reporter.reporter import FileReporter from macaron.output_reporter.results import Record, Report, SCMStatus from macaron.repo_finder import repo_finder @@ -313,23 +320,18 @@ def run_single( provenance_payload = ProvenanceFinder().find_provenance(parsed_purl) # Create the analysis target. - msg = "" available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] try: analysis_target = Analyzer.to_analysis_target(config, available_domains, parsed_purl, provenance_payload) - except InvalidPURLError as error: - logger.debug("Invalid input PURL: %s", error) - msg = "Invalid input PURL." - analysis_target = None - - if not analysis_target or (not analysis_target.parsed_purl and not analysis_target.repo_path): + except InvalidAnalysisTargetError as error: return Record( record_id=repo_id, - description=msg or "Cannot determine the analysis as PURL and/or repository path is not provided.", + description=str(error), pre_config=config, status=SCMStatus.ANALYSIS_FAILED, ) + # Create the component. component = None try: component = self.add_component(analysis, analysis_target, existing_records) @@ -624,8 +626,8 @@ def to_analysis_target( Raises ------ - InvalidPURLError - If the PURL provided from the user is invalid. + InvalidAnalysisTargetError + Raised if a valid Analysis Target cannot be created. """ repo_path_input: str = config.get_value("path") input_branch: str = config.get_value("branch") @@ -633,7 +635,9 @@ def to_analysis_target( match (parsed_purl, repo_path_input): case (None, ""): - return Analyzer.AnalysisTarget(parsed_purl=None, repo_path="", branch="", digest="") + raise InvalidAnalysisTargetError( + "Cannot determine the analysis target: PURL and repository path are missing." + ) case (None, _): # If only the repository path is provided, we will use the user-provided repository path to create the @@ -689,7 +693,9 @@ def to_analysis_target( ) case _: - return Analyzer.AnalysisTarget(parsed_purl=None, repo_path="", branch="", digest="") + raise InvalidAnalysisTargetError( + "Cannot determine the analysis target: PURL and repository path are missing." + ) def get_analyze_ctx(self, component: Component) -> AnalyzeContext: """Return the analyze context for a target component. @@ -996,3 +1002,7 @@ def __init__(self, *args: Any, context: AnalyzeContext | None = None, **kwargs: """ super().__init__(*args, **kwargs) self.context: AnalyzeContext | None = context + + +class InvalidAnalysisTargetError(MacaronError): + """When a valid Analysis Target cannot be constructed.""" diff --git a/src/macaron/slsa_analyzer/package_registry/npm_registry.py b/src/macaron/slsa_analyzer/package_registry/npm_registry.py index 1e38486ae..e62185023 100644 --- a/src/macaron/slsa_analyzer/package_registry/npm_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/npm_registry.py @@ -201,12 +201,12 @@ def download_attestation_payload(self, url: str, download_path: str) -> bool: return False - def get_latest_version(self, namespace: str, name: str) -> str | None: + def get_latest_version(self, namespace: str | None, name: str) -> str | None: """Try to retrieve the latest version of a package from the registry. Parameters ---------- - namespace: str + namespace: str | None The optional namespace of the package. name: str The name of the package. @@ -231,10 +231,7 @@ def get_latest_version(self, namespace: str, name: str) -> str | None: return None json_data = json.loads(response.text) - try: - version = json_data["version"] - except KeyError: - version = "" + version = json_data.get("version") if not version: logger.debug("No version found in response from NPM server.") return None diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index 44c6d2e84..e8efffd49 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -17,9 +17,10 @@ @pytest.fixture(name="slsa_v1_gcb_1_provenance") -def slsa_v1_gcb_1_provenance_() -> str: +def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and sourceToBuild.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -42,12 +43,14 @@ def slsa_v1_gcb_1_provenance_() -> str: } } """ + ) @pytest.fixture(name="slsa_v1_gcb_2_provenance") -def slsa_v1_gcb_2_provenance_() -> str: +def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and configSource.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -72,12 +75,14 @@ def slsa_v1_gcb_2_provenance_() -> str: } } """ + ) @pytest.fixture(name="slsa_v1_github_provenance") -def slsa_v1_github_provenance_() -> str: +def slsa_v1_github_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type GitHub.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v1", "subject": [], @@ -105,12 +110,14 @@ def slsa_v1_github_provenance_() -> str: } } """ + ) @pytest.fixture(name="slsa_v02_provenance") -def slsa_v02_provenance_() -> str: +def slsa_v02_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v02 provenance.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -127,12 +134,14 @@ def slsa_v02_provenance_() -> str: } } """ + ) @pytest.fixture(name="slsa_v01_provenance") -def slsa_v01_provenance_() -> str: +def slsa_v01_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v01 provenance.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -155,153 +164,14 @@ def slsa_v01_provenance_() -> str: } } """ - - -@pytest.fixture(name="target_repository") -def target_repository_() -> str: - """Return the target repository URL.""" - return "https://github.com/oracle/macaron" - - -@pytest.fixture(name="target_commit") -def target_commit_() -> str: - """Return the target commit hash.""" - return "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" - - -def test_slsa_v1_gcb_1(slsa_v1_gcb_1_provenance: str, target_repository: str, target_commit: str) -> None: - """Test SLSA v1 provenance with build type gcb and sourceToBuild.""" - payload = json.loads(slsa_v1_gcb_1_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set repository to an empty string. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], "") - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Remove repository key. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Add repository back. - _json_modify( - payload, - ["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], - target_repository, - ) - # Re-test provenance validity. - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Remove commit. - _json_modify(payload, ["predicate", "buildDefinition", "resolvedDependencies"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - -def test_slsa_v1_gcb_2(slsa_v1_gcb_2_provenance: str, target_repository: str, target_commit: str) -> None: - """Test SLSA v1 provenance with build type gcb and configSource.""" - payload = json.loads(slsa_v1_gcb_2_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set repository to an empty string. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], "") - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Remove repository key. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Re-add repository key with a bad value. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], "bad") - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - -def test_slsa_v1_github(slsa_v1_github_provenance: str, target_repository: str, target_commit: str) -> None: - """Test SLSA v1 provenance with build type GitHub.""" - payload = json.loads(slsa_v1_github_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set repository to an empty string. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], "") - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Remove repository key. - _json_modify(payload, ["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - -def test_slsa_v02(slsa_v02_provenance: str, target_repository: str, target_commit: str) -> None: - """Test SLSA v0.2 provenance.""" - payload = json.loads(slsa_v02_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set repository to an empty string. - _json_modify(payload, ["predicate", "invocation", "configSource", "uri"], "") - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Remove repository key. - _json_modify(payload, ["predicate", "invocation", "configSource", "uri"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Re-add repository and re-validate. - _json_modify( - payload, ["predicate", "invocation", "configSource", "uri"], f"git+{target_repository}@refs/heads/main" ) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Remove commit. - _json_modify(payload, ["predicate", "invocation", "configSource", "digest", "sha1"], None) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - -def test_slsa_v01(slsa_v01_provenance: str, target_repository: str, target_commit: str) -> None: - """Test SLSA v0.1 provenance.""" - payload = json.loads(slsa_v01_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set repository to an empty string. - materials = json_extract(payload, ["predicate", "materials"], list) - material_index = json_extract(payload, ["predicate", "recipe", "definedInMaterial"], int) - _json_modify(materials[material_index], ["uri"], "") - _json_modify(payload, ["predicate", "materials"], materials) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Remove repository. - _json_modify(materials[material_index], ["uri"], None) - _json_modify(payload, ["predicate", "materials"], materials) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") - - # Restore repository and re-validate. - _json_modify(materials[material_index], ["uri"], f"git+{target_repository}@refs/heads/main") - _json_modify(payload, ["predicate", "materials"], materials) - _perform_provenance_comparison(payload, target_repository, target_commit) - - # Set material index to an invalid value. - _json_modify(payload, ["predicate", "recipe", "definedInMaterial"], 10) - with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") @pytest.fixture(name="witness_gitlab_provenance") -def witness_gitlab_provenance_() -> str: +def witness_gitlab_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitLab attestation.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -325,12 +195,14 @@ def witness_gitlab_provenance_() -> str: } } """ + ) @pytest.fixture(name="witness_github_provenance") -def witness_github_provenance_() -> str: +def witness_github_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitHub attestation.""" - return """ + return _load_and_validate_josn( + """ { "_type": "https://in-toto.io/Statement/v0.1", "subject": [], @@ -354,51 +226,190 @@ def witness_github_provenance_() -> str: } } """ + ) + + +@pytest.fixture(name="target_repository") +def target_repository_() -> str: + """Return the target repository URL.""" + return "https://github.com/oracle/macaron" -def test_witness_gitlab(witness_gitlab_provenance: str) -> None: - """Test Witness v01 GitLab provenance.""" - target_repository = "https://gitlab.com/tinyMediaManager/tinyMediaManager" - target_commit = "cf6080a92d1c748ba5f05ea16529e05e5c641a49" - payload = json.loads(witness_gitlab_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) +@pytest.fixture(name="target_commit") +def target_commit_() -> str: + """Return the target commit hash.""" + return "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + - # Set repository to an empty string. - attestations = json_extract(payload, ["predicate", "attestations"], list) - _json_modify(attestations[0], ["attestation", "projecturl"], "") - _json_modify(payload, ["attestation"], attestations) +def test_slsa_v1_gcb_1_is_valid( + slsa_v1_gcb_1_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test valid SLSA v1 provenance with build type gcb and sourceToBuild.""" + _perform_provenance_comparison(slsa_v1_gcb_1_provenance, target_repository, target_commit) + + +@pytest.mark.parametrize( + ("keys", "new_value"), + [ + (["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], ""), + (["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], None), + (["predicate", "buildDefinition", "externalParameters", "sourceToBuild", "repository"], "bad_url"), + (["predicate", "buildDefinition", "resolvedDependencies"], ""), + (["predicate", "buildDefinition", "resolvedDependencies"], None), + ], +) +def test_slsa_v1_gcb_1_is_invalid( + slsa_v1_gcb_1_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType +) -> None: + """Test invalidly modified SLSA v1 provenance with build type gcb and sourceToBuild.""" + _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") + _perform_provenance_comparison(slsa_v1_gcb_1_provenance, "", "") + + +def test_slsa_v1_gcb_2_is_valid( + slsa_v1_gcb_2_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test valid SLSA v1 provenance with build type gcb and configSource.""" + _perform_provenance_comparison(slsa_v1_gcb_2_provenance, target_repository, target_commit) - # Remove repository. - _json_modify(attestations[0], ["attestation", "projecturl"], None) - _json_modify(payload, ["attestation"], attestations) + +@pytest.mark.parametrize( + ("keys", "new_value"), + [ + (["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], ""), + (["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], None), + (["predicate", "buildDefinition", "externalParameters", "configSource", "repository"], "bad_url"), + ], +) +def test_slsa_v1_gcb_2_is_invalid( + slsa_v1_gcb_2_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType +) -> None: + """Test invalidly modified SLSA v1 provenance with build type gcb and configSource.""" + _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") + _perform_provenance_comparison(slsa_v1_gcb_2_provenance, "", "") + - # Restore repository and re-validate. - _json_modify(attestations[0], ["attestation", "projecturl"], target_repository) - _json_modify(payload, ["attestation"], attestations) - _perform_provenance_comparison(payload, target_repository, target_commit) +def test_slsa_v1_github_is_valid( + slsa_v1_github_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test valid SLSA v1 provenance with build type GitHub.""" + _perform_provenance_comparison(slsa_v1_github_provenance, target_repository, target_commit) - # Set commit to an empty string. - _json_modify(attestations[1], ["attestation", "commithash"], "") - _json_modify(payload, ["attestation"], attestations) + +@pytest.mark.parametrize( + ("keys", "new_value"), + [ + (["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], ""), + (["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], None), + (["predicate", "buildDefinition", "externalParameters", "workflow", "repository"], "bad_url"), + ], +) +def test_slsa_v1_github_is_invalid( + slsa_v1_github_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType +) -> None: + """Test invalidly modified SLSA v1 provenance with build type GitHub.""" + _json_modify(slsa_v1_github_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") + _perform_provenance_comparison(slsa_v1_github_provenance, "", "") - # Remove the Git attestation. - _json_modify(payload, ["attestation"], attestations[:1]) + +def test_slsa_v02_is_valid( + slsa_v02_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test SLSA v0.2 provenance.""" + _perform_provenance_comparison(slsa_v02_provenance, target_repository, target_commit) + + +@pytest.mark.parametrize( + ("keys", "new_value"), + [ + (["predicate", "invocation", "configSource", "uri"], ""), + (["predicate", "invocation", "configSource", "uri"], None), + (["predicate", "invocation", "configSource", "uri"], "bad_url"), + (["predicate", "invocation", "configSource", "digest", "sha1"], ""), + (["predicate", "invocation", "configSource", "digest", "sha1"], None), + ], +) +def test_slsa_v02_is_invalid(slsa_v02_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: + """Test invalidly modified SLSA v0.2 provenance.""" + _json_modify(slsa_v02_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") + _perform_provenance_comparison(slsa_v02_provenance, "", "") + + +def test_slsa_v01_is_valid( + slsa_v01_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test valid SLSA v0.1 provenance.""" + _perform_provenance_comparison(slsa_v01_provenance, target_repository, target_commit) + + +@pytest.mark.parametrize( + "new_value", + [ + "", + None, + ], +) +def test_slsa_v01_is_invalid(slsa_v01_provenance: dict[str, JsonType], new_value: JsonType) -> None: + """Test invalidly modified SLSA v0.1 provenance.""" + materials = json_extract(slsa_v01_provenance, ["predicate", "materials"], list) + material_index = json_extract(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], int) + _json_modify(materials[material_index], ["uri"], new_value) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(slsa_v01_provenance, "", "") -def test_witness_github(witness_github_provenance: str, target_repository: str, target_commit: str) -> None: - """Test Witness v01 GitHub provenance.""" - payload = json.loads(witness_github_provenance) - assert isinstance(payload, dict) - _perform_provenance_comparison(payload, target_repository, target_commit) +def test_slsa_v01_invalid_material_index(slsa_v01_provenance: dict[str, JsonType]) -> None: + """Test the SLSA v0.1 provenance with an invalid materials index.""" + _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(slsa_v01_provenance, "", "") + + +def test_witness_gitlab_is_valid(witness_gitlab_provenance: dict[str, JsonType]) -> None: + """Test valid Witness v0.1 GitLab provenance.""" + _perform_provenance_comparison( + witness_gitlab_provenance, + "https://gitlab.com/tinyMediaManager/tinyMediaManager", + "cf6080a92d1c748ba5f05ea16529e05e5c641a49", + ) + + +def test_witness_github_is_valid( + witness_github_provenance: dict[str, JsonType], target_repository: str, target_commit: str +) -> None: + """Test valid Witness v0.1 GitHub provenance.""" + _perform_provenance_comparison(witness_github_provenance, target_repository, target_commit) + + +@pytest.mark.parametrize( + ("keys", "new_value", "attestation_index"), + [ + (["attestation", "projecturl"], "", 0), + (["attestation", "projecturl"], None, 0), + (["attestation", "commithash"], "", 1), + (["attestation", "commithash"], None, 1), + ], +) +def test_witness_github_is_invalid( + witness_github_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType, attestation_index: int +) -> None: + """Test invalidly modified Witness v0.1 GitHub provenance.""" + attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) + _json_modify(attestations[attestation_index], keys, new_value) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(witness_github_provenance, "", "") + + +def test_witness_github_remove_attestation(witness_github_provenance: dict[str, JsonType]) -> None: + """Test removing Git attestation from Witness V0.1 GitHub provenance.""" + attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) + _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) + with pytest.raises(ProvenanceExtractionException): + _perform_provenance_comparison(witness_github_provenance, "", "") @pytest.mark.parametrize( @@ -419,9 +430,8 @@ def test_invalid_type_payloads(type_: str, predicate_type: str) -> None: _perform_provenance_comparison(payload, "", "") -def _perform_provenance_comparison(payload: JsonType, expected_repo: str, expected_commit: str) -> None: +def _perform_provenance_comparison(payload: dict[str, JsonType], expected_repo: str, expected_commit: str) -> None: """Accept a provenance and extraction function, assert the extracted values match the expected ones.""" - assert isinstance(payload, dict) provenance = validate_intoto_payload(payload) repo, commit = extract_repo_and_commit_from_provenance(provenance) assert expected_repo == repo @@ -455,3 +465,10 @@ def _json_modify(entry: dict[str, JsonType], keys: list[str], new_value: JsonTyp if not isinstance(next_target, dict): raise JsonExtractionException(f"Cannot extract value from non-dict type: {str(type(next_target))}") target = next_target + + +def _load_and_validate_josn(payload: str) -> dict[str, JsonType]: + """Load payload as JSON and validate it is of type dict.""" + json_payload = json.loads(payload) + assert isinstance(json_payload, dict) + return json_payload diff --git a/tests/slsa_analyzer/test_analyzer.py b/tests/slsa_analyzer/test_analyzer.py index 3d305590e..e5f840ba6 100644 --- a/tests/slsa_analyzer/test_analyzer.py +++ b/tests/slsa_analyzer/test_analyzer.py @@ -13,7 +13,7 @@ from macaron.config.target_config import Configuration from macaron.errors import InvalidPURLError -from macaron.slsa_analyzer.analyzer import Analyzer +from macaron.slsa_analyzer.analyzer import Analyzer, InvalidAnalysisTargetError from ..macaron_testcase import MacaronTestCase @@ -154,3 +154,9 @@ def test_resolve_analysis_target_invalid_purl(config: Configuration) -> None: """Test the resolve analysis target method with invalid inputs.""" with pytest.raises(InvalidPURLError): Analyzer.parse_purl(config) + + +def test_resolve_analysis_target_no_purl_or_repository() -> None: + """Test creation of an Analysis Target when no PURL or repository path is provided.""" + with pytest.raises(InvalidAnalysisTargetError): + Analyzer.to_analysis_target(Configuration(), [], None) From bafebfcf24c5fa83c0aed543996429034c3471a5 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 13 Mar 2024 20:13:43 +1000 Subject: [PATCH 10/25] chore: add type for npm latest version response to help mypy. Signed-off-by: Ben Selwyn-Smith --- src/macaron/slsa_analyzer/package_registry/npm_registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/package_registry/npm_registry.py b/src/macaron/slsa_analyzer/package_registry/npm_registry.py index e62185023..7786d0e1b 100644 --- a/src/macaron/slsa_analyzer/package_registry/npm_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/npm_registry.py @@ -231,13 +231,13 @@ def get_latest_version(self, namespace: str | None, name: str) -> str | None: return None json_data = json.loads(response.text) - version = json_data.get("version") + version: str | None = json_data.get("version") if not version: logger.debug("No version found in response from NPM server.") return None logger.debug("Found version for NPM artifact: %s", version) - return version if isinstance(version, str) else str(version) + return version class NPMAttestationAsset(NamedTuple): From e5e80d901f52d3cfd94ada19576a8e49010f1ee0 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 14 Mar 2024 09:05:21 +1000 Subject: [PATCH 11/25] chore: remove duplicate test; update test for analysis target changes. Signed-off-by: Ben Selwyn-Smith --- tests/repo_finder/test_repo_finder.py | 65 --------------------------- tests/slsa_analyzer/test_analyzer.py | 5 --- 2 files changed, 70 deletions(-) diff --git a/tests/repo_finder/test_repo_finder.py b/tests/repo_finder/test_repo_finder.py index 03b86c4d5..ba0bc2b20 100644 --- a/tests/repo_finder/test_repo_finder.py +++ b/tests/repo_finder/test_repo_finder.py @@ -6,74 +6,9 @@ from pathlib import Path import pytest -from packageurl import PackageURL from macaron.config.defaults import load_defaults -from macaron.config.target_config import Configuration from macaron.repo_finder.repo_finder_java import JavaRepoFinder -from macaron.slsa_analyzer.analyzer import Analyzer - - -@pytest.mark.parametrize( - ("config", "available_domains", "expect"), - [ - ( - Configuration({"purl": ""}), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget(parsed_purl=None, repo_path="", branch="", digest=""), - ), - ( - Configuration({"purl": "pkg:github.com/apache/maven"}), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget( - parsed_purl=PackageURL.from_string("pkg:github.com/apache/maven"), - repo_path="https://github.com/apache/maven", - branch="", - digest="", - ), - ), - ( - Configuration({"purl": "", "path": "https://github.com/apache/maven"}), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget( - parsed_purl=None, repo_path="https://github.com/apache/maven", branch="", digest="" - ), - ), - ( - Configuration({"purl": "pkg:maven/apache/maven", "path": "https://github.com/apache/maven"}), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget( - parsed_purl=PackageURL.from_string("pkg:maven/apache/maven"), - repo_path="https://github.com/apache/maven", - branch="", - digest="", - ), - ), - ( - Configuration( - { - "purl": "pkg:maven/apache/maven", - "path": "https://github.com/apache/maven", - "branch": "master", - "digest": "abcxyz", - } - ), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget( - parsed_purl=PackageURL.from_string("pkg:maven/apache/maven"), - repo_path="https://github.com/apache/maven", - branch="master", - digest="abcxyz", - ), - ), - ], -) -def test_resolve_analysis_target( - config: Configuration, available_domains: list[str], expect: Analyzer.AnalysisTarget -) -> None: - """Test the resolve analysis target method with valid inputs.""" - parsed_purl = Analyzer.parse_purl(config) - assert Analyzer.to_analysis_target(config, available_domains, parsed_purl) == expect @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/test_analyzer.py b/tests/slsa_analyzer/test_analyzer.py index e5f840ba6..18e6eae59 100644 --- a/tests/slsa_analyzer/test_analyzer.py +++ b/tests/slsa_analyzer/test_analyzer.py @@ -48,11 +48,6 @@ def test_resolve_local_path(self) -> None: @pytest.mark.parametrize( ("config", "available_domains", "expect"), [ - ( - Configuration({"purl": ""}), - ["github.com", "gitlab.com", "bitbucket.org"], - Analyzer.AnalysisTarget(parsed_purl=None, repo_path="", branch="", digest=""), - ), ( Configuration({"purl": "pkg:github.com/apache/maven"}), ["github.com", "gitlab.com", "bitbucket.org"], From 77a88bbb2554f148e6b0fa5aa9200ac3cac67d80 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 14 Mar 2024 12:38:57 +1000 Subject: [PATCH 12/25] chore: minor fix. Signed-off-by: Ben Selwyn-Smith --- tests/repo_finder/test_provenance_extractor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index e8efffd49..e98bee8bc 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -19,7 +19,7 @@ @pytest.fixture(name="slsa_v1_gcb_1_provenance") def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and sourceToBuild.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v1", @@ -49,7 +49,7 @@ def slsa_v1_gcb_1_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v1_gcb_2_provenance") def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type gcb and configSource.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v1", @@ -81,7 +81,7 @@ def slsa_v1_gcb_2_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v1_github_provenance") def slsa_v1_github_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v1 provenance using build type GitHub.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v1", @@ -116,7 +116,7 @@ def slsa_v1_github_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v02_provenance") def slsa_v02_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v02 provenance.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v0.1", @@ -140,7 +140,7 @@ def slsa_v02_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="slsa_v01_provenance") def slsa_v01_provenance_() -> dict[str, JsonType]: """Return a valid SLSA v01 provenance.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v0.1", @@ -170,7 +170,7 @@ def slsa_v01_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="witness_gitlab_provenance") def witness_gitlab_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitLab attestation.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v0.1", @@ -201,7 +201,7 @@ def witness_gitlab_provenance_() -> dict[str, JsonType]: @pytest.fixture(name="witness_github_provenance") def witness_github_provenance_() -> dict[str, JsonType]: """Return a Witness v0.1 provenance with a GitHub attestation.""" - return _load_and_validate_josn( + return _load_and_validate_json( """ { "_type": "https://in-toto.io/Statement/v0.1", @@ -467,7 +467,7 @@ def _json_modify(entry: dict[str, JsonType], keys: list[str], new_value: JsonTyp target = next_target -def _load_and_validate_josn(payload: str) -> dict[str, JsonType]: +def _load_and_validate_json(payload: str) -> dict[str, JsonType]: """Load payload as JSON and validate it is of type dict.""" json_payload = json.loads(payload) assert isinstance(json_payload, dict) From cfd9a3e7c8eb0659416f37c4310b8ae964df0a0d Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 14 Mar 2024 14:15:15 +1000 Subject: [PATCH 13/25] chore: update comment to reflect immediate proceedings only; refactor json_extract function. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 38 +++++++++++-------- src/macaron/slsa_analyzer/analyzer.py | 2 +- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 409f45538..7914797d5 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -82,11 +82,15 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: raise ProvenanceExtractionException("Indexed material list entry is invalid.") uri = json_extract(material, ["uri"], str) + repo = _clean_spdx(uri) digest_set = json_extract(material, ["digest"], dict) commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) + if not commit: + raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") + return repo, commit @@ -99,11 +103,16 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: # The repository URL and commit are stored within the predicate -> invocation -> configSource object. # See https://slsa.dev/spec/v0.2/provenance uri = json_extract(predicate, ["invocation", "configSource", "uri"], str) + if not uri: + raise ProvenanceExtractionException("Failed to extract repository URL from provenance.") repo = _clean_spdx(uri) digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict) commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) + if not commit: + raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") + return repo, commit @@ -223,15 +232,15 @@ class JsonExtractionException(MacaronError): T = TypeVar("T", bound=JsonType) -def json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> T: +def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T: """Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary. - The value must be truthy, and be of the passed type. + The value must be of the passed type. Parameters ---------- - entry: dict[str, JsonType] - An entry point into the JSON structure. + entry: JsonType + An entry point into a JSON structure. keys: list[str] The list of depth-sequential keys within the JSON. type: type[T] @@ -248,16 +257,15 @@ def json_extract(entry: dict[str, JsonType], keys: list[str], type_: type[T]) -> Raised if an error occurs while searching for or validating the value. """ target = entry + for index, key in enumerate(keys): + if not isinstance(target, dict): + raise JsonExtractionException(f"Expect the value .{'.'.join(keys[:index])} to be a dict.") if key not in target: - raise JsonExtractionException(f"JSON key not found: {key}") - next_target = target[key] - if index == len(keys) - 1: - if next_target and isinstance(next_target, type_): - return next_target - else: - if not isinstance(next_target, dict): - raise JsonExtractionException(f"Cannot extract value from non-dict type: {str(type(next_target))}") - target = next_target - - raise JsonExtractionException(f"Failed to find '{' > '.join(keys)}' as type '{type_}' in JSON dictionary.") + raise JsonExtractionException(f"JSON key '{key}' not found in .{'.'.join(keys[:index])}.") + target = target[key] + + if isinstance(target, type_): + return target + + raise JsonExtractionException(f"Expect the value .{'.'.join(keys)} to be of type '{type_}'.") diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 00f0aca91..5c0a38758 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -671,7 +671,7 @@ def to_analysis_target( digest=digest, ) - # The commit was not found from provenance. Proceed with Repo and Commit Finder. + # The commit was not found from provenance. Proceed with Repo Finder. converted_repo_path = repo_finder.to_repo_path(parsed_purl, available_domains) if converted_repo_path is None: # Try to find repo from PURL From 488f64a0baaa66c8ffad79a6821cc55883fd9b45 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 18 Mar 2024 11:02:07 +1000 Subject: [PATCH 14/25] chore: restrict in-toto digest set algorithms; refactor provenance extractor tests. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 1 - src/macaron/slsa_analyzer/analyzer.py | 4 +- .../provenance/intoto/v01/__init__.py | 22 +---- .../provenance/intoto/v1/__init__.py | 23 +---- .../repo_finder/test_provenance_extractor.py | 97 +++++++++---------- 5 files changed, 55 insertions(+), 92 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 7914797d5..cca65559d 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -204,7 +204,6 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor The DigestSet is an in-toto object that maps algorithm types to commit hashes (digests). """ - # TODO decide on a preference for which algorithm to accept. if len(digest_set.keys()) > 1: logger.debug("DigestSet contains multiple algorithms: %s", digest_set.keys()) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 5c0a38758..c3604d080 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -586,9 +586,9 @@ def parse_purl(config: Configuration) -> PackageURL | None: # Therefore, their true types are ``str``, and an empty string indicates that the input value is not provided. # The purl might be a PackageURL type, a string, or None, which should be reduced down to an optional # PackageURL type. - if config.get_value("purl") is None or config.get_value("purl") == "": - return None purl = config.get_value("purl") + if purl is None or purl == "": + return None if isinstance(purl, PackageURL): return purl try: diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index 1833e41be..fb8a83963 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -10,29 +10,11 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The full list of cryptographic algorithms supported in in-toto v0.1 provenance. +# The list of cryptographic algorithms supported in in-toto v0.1 provenance. # These are used as keys within the digest set of the resource descriptors within the subject. -# For v0.1 see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet +# For the full v0.1 list see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet VALID_ALGORITHMS = [ - "sha256", - "sha224", - "sha384", - "sha512", - "sha512_224", - "sha512_256", - "sha3_224", - "sha3_256", - "sha3_384", - "sha3_512", - "shake128", - "shake256", - "blake2b", - "blake2s", - "ripemd160", - "sm3", - "gost", "sha1", - "md5", ] diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py index 9f1b95eb7..c6cbf75cd 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py @@ -11,29 +11,12 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The full list of cryptographic algorithms supported in in-toto v1 provenance. +# The list of cryptographic algorithms supported in in-toto v1 provenance. # These are used as keys within the digest set of the resource descriptors within the subject. -# For v1 see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md +# For the full v1 list see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md VALID_ALGORITHMS = [ - "sha256", - "sha224", - "sha384", - "sha512", - "sha512_224", - "sha512_256", - "sha3_224", - "sha3_256", - "sha3_384", - "sha3_512", - "shake128", - "shake256", - "blake2b", - "blake2s", - "ripemd160", - "sm3", - "gost", "sha1", - "md5", + "gitCommit", # This special git value is equivalent to SHA-1 or SHA-256. See the v1 spec for more information. ] diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index e98bee8bc..2c502d891 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -7,7 +7,6 @@ import pytest from macaron.repo_finder.provenance_extractor import ( - JsonExtractionException, ProvenanceExtractionException, extract_repo_and_commit_from_provenance, json_extract, @@ -157,7 +156,7 @@ def slsa_v01_provenance_() -> dict[str, JsonType]: { "uri": "git+https://github.com/oracle/macaron@refs/heads/main", "digest": { - "sha256": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" + "sha1": "51aa22a42ec1bffa71518041a6a6d42d40bf50f0" } } ] @@ -245,7 +244,7 @@ def test_slsa_v1_gcb_1_is_valid( slsa_v1_gcb_1_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test valid SLSA v1 provenance with build type gcb and sourceToBuild.""" - _perform_provenance_comparison(slsa_v1_gcb_1_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_1_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -262,16 +261,16 @@ def test_slsa_v1_gcb_1_is_invalid( slsa_v1_gcb_1_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and sourceToBuild.""" - _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) + assert _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v1_gcb_1_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_1_provenance) def test_slsa_v1_gcb_2_is_valid( slsa_v1_gcb_2_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test valid SLSA v1 provenance with build type gcb and configSource.""" - _perform_provenance_comparison(slsa_v1_gcb_2_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_2_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -286,16 +285,16 @@ def test_slsa_v1_gcb_2_is_invalid( slsa_v1_gcb_2_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and configSource.""" - _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) + assert _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v1_gcb_2_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_2_provenance) def test_slsa_v1_github_is_valid( slsa_v1_github_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test valid SLSA v1 provenance with build type GitHub.""" - _perform_provenance_comparison(slsa_v1_github_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(slsa_v1_github_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -310,16 +309,16 @@ def test_slsa_v1_github_is_invalid( slsa_v1_github_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type GitHub.""" - _json_modify(slsa_v1_github_provenance, keys, new_value) + assert _json_modify(slsa_v1_github_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v1_github_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v1_github_provenance) def test_slsa_v02_is_valid( slsa_v02_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test SLSA v0.2 provenance.""" - _perform_provenance_comparison(slsa_v02_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(slsa_v02_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -334,16 +333,16 @@ def test_slsa_v02_is_valid( ) def test_slsa_v02_is_invalid(slsa_v02_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: """Test invalidly modified SLSA v0.2 provenance.""" - _json_modify(slsa_v02_provenance, keys, new_value) + assert _json_modify(slsa_v02_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v02_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v02_provenance) def test_slsa_v01_is_valid( slsa_v01_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test valid SLSA v0.1 provenance.""" - _perform_provenance_comparison(slsa_v01_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -357,21 +356,21 @@ def test_slsa_v01_is_invalid(slsa_v01_provenance: dict[str, JsonType], new_value """Test invalidly modified SLSA v0.1 provenance.""" materials = json_extract(slsa_v01_provenance, ["predicate", "materials"], list) material_index = json_extract(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], int) - _json_modify(materials[material_index], ["uri"], new_value) + assert _json_modify(materials[material_index], ["uri"], new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v01_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) def test_slsa_v01_invalid_material_index(slsa_v01_provenance: dict[str, JsonType]) -> None: """Test the SLSA v0.1 provenance with an invalid materials index.""" - _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) + assert _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(slsa_v01_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) def test_witness_gitlab_is_valid(witness_gitlab_provenance: dict[str, JsonType]) -> None: """Test valid Witness v0.1 GitLab provenance.""" - _perform_provenance_comparison( + _test_extract_repo_and_commit_from_provenance( witness_gitlab_provenance, "https://gitlab.com/tinyMediaManager/tinyMediaManager", "cf6080a92d1c748ba5f05ea16529e05e5c641a49", @@ -382,7 +381,7 @@ def test_witness_github_is_valid( witness_github_provenance: dict[str, JsonType], target_repository: str, target_commit: str ) -> None: """Test valid Witness v0.1 GitHub provenance.""" - _perform_provenance_comparison(witness_github_provenance, target_repository, target_commit) + _test_extract_repo_and_commit_from_provenance(witness_github_provenance, target_repository, target_commit) @pytest.mark.parametrize( @@ -399,17 +398,17 @@ def test_witness_github_is_invalid( ) -> None: """Test invalidly modified Witness v0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) - _json_modify(attestations[attestation_index], keys, new_value) + assert _json_modify(attestations[attestation_index], keys, new_value) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(witness_github_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(witness_github_provenance) def test_witness_github_remove_attestation(witness_github_provenance: dict[str, JsonType]) -> None: """Test removing Git attestation from Witness V0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) - _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) + assert _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(witness_github_provenance, "", "") + _test_extract_repo_and_commit_from_provenance(witness_github_provenance) @pytest.mark.parametrize( @@ -423,14 +422,14 @@ def test_witness_github_remove_attestation(witness_github_provenance: dict[str, ) def test_invalid_type_payloads(type_: str, predicate_type: str) -> None: """Test payloads with invalid type combinations.""" - payload_text = '{ "_type": ' + f'"{type_}",' + ' "predicateType": ' + f'"{predicate_type}",' - payload_text = f"{payload_text}" + '"subject": [], "predicate": {} }' - payload = json.loads(payload_text) + payload: dict[str, JsonType] = {"_type": type_, "predicateType": predicate_type, "subject": [], "predicate": {}} with pytest.raises(ProvenanceExtractionException): - _perform_provenance_comparison(payload, "", "") + _test_extract_repo_and_commit_from_provenance(payload) -def _perform_provenance_comparison(payload: dict[str, JsonType], expected_repo: str, expected_commit: str) -> None: +def _test_extract_repo_and_commit_from_provenance( + payload: dict[str, JsonType], expected_repo: str = "", expected_commit: str = "" +) -> None: """Accept a provenance and extraction function, assert the extracted values match the expected ones.""" provenance = validate_intoto_payload(payload) repo, commit = extract_repo_and_commit_from_provenance(provenance) @@ -438,7 +437,7 @@ def _perform_provenance_comparison(payload: dict[str, JsonType], expected_repo: assert expected_commit == commit -def _json_modify(entry: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: +def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> bool: """Modify the value found by following the list of depth-sequential keys inside the passed JSON dictionary. The found value will be overwritten by the new_value parameter. @@ -446,25 +445,25 @@ def _json_modify(entry: dict[str, JsonType], keys: list[str], new_value: JsonTyp If the final key does not exist, it will be created as new_value. """ target = entry - for index, key in enumerate(keys): + last_target = None + + for key in keys: + if not isinstance(target, dict): + return False if key not in target: - if index == len(keys) - 1: - # Add key. - target[key] = new_value - return - raise JsonExtractionException(f"JSON key not found: {key}") - next_target = target[key] - if index == len(keys) - 1: - if new_value is None: - # Remove value. - del target[key] - else: - # Replace value - target[key] = new_value - else: - if not isinstance(next_target, dict): - raise JsonExtractionException(f"Cannot extract value from non-dict type: {str(type(next_target))}") - target = next_target + return False + last_target = target + target = target[key] + + if last_target is None: + return False + + if new_value is None: + del last_target[keys[len(keys) - 1]] + else: + last_target[keys[len(keys) - 1]] = new_value + + return True def _load_and_validate_json(payload: str) -> dict[str, JsonType]: From 7d995cf7a99001fc905bb2b6d4ccf98c3f983a9e Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 18 Mar 2024 11:06:18 +1000 Subject: [PATCH 15/25] chore: improve digest set debug information. Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/provenance_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index cca65559d..0d84c3f64 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -212,7 +212,7 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor value = digest_set.get(key) if isinstance(value, str): return value - raise ProvenanceExtractionException("No valid digest in digest set.") + raise ProvenanceExtractionException(f"No valid digest in digest set: {digest_set.keys()} not in {valid_algorithms}") def _clean_spdx(uri: str) -> str: From 9a826389cd59e927460b3ac72992e6203be98a9b Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Mon, 18 Mar 2024 11:18:19 +1000 Subject: [PATCH 16/25] chore: separate SLSA extraction digest set algorithms from in-toto acceptance list. Signed-off-by: Ben Selwyn-Smith --- .../repo_finder/provenance_extractor.py | 12 ++++++---- .../provenance/intoto/v01/__init__.py | 22 +++++++++++++++++-- .../provenance/intoto/v1/__init__.py | 22 +++++++++++++++++-- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 0d84c3f64..fee18a90c 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -6,7 +6,6 @@ from typing import TypeVar from macaron.errors import MacaronError -from macaron.slsa_analyzer.provenance import intoto from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload from macaron.util import JsonType @@ -17,6 +16,11 @@ class ProvenanceExtractionException(MacaronError): """When there is an error while extracting from provenance.""" +SLSA_V01_DIGEST_SET_ALGORITHMS = ["sha1"] +SLSA_V02_DIGEST_SET_ALGORITHMS = ["sha1"] +SLSA_V1_DIGEST_SET_ALGORITHMS = ["sha1", "gitCommit"] + + def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str, str]: """Extract the repository and commit metadata from the passed provenance payload. @@ -86,7 +90,7 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: repo = _clean_spdx(uri) digest_set = json_extract(material, ["digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V01_DIGEST_SET_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") @@ -108,7 +112,7 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: repo = _clean_spdx(uri) digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, intoto.v01.VALID_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V02_DIGEST_SET_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") @@ -149,7 +153,7 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: if url != repo: continue digest_set = json_extract(dep, ["digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, intoto.v1.VALID_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V1_DIGEST_SET_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index fb8a83963..1833e41be 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -10,11 +10,29 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The list of cryptographic algorithms supported in in-toto v0.1 provenance. +# The full list of cryptographic algorithms supported in in-toto v0.1 provenance. # These are used as keys within the digest set of the resource descriptors within the subject. -# For the full v0.1 list see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet +# For v0.1 see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet VALID_ALGORITHMS = [ + "sha256", + "sha224", + "sha384", + "sha512", + "sha512_224", + "sha512_256", + "sha3_224", + "sha3_256", + "sha3_384", + "sha3_512", + "shake128", + "shake256", + "blake2b", + "blake2s", + "ripemd160", + "sm3", + "gost", "sha1", + "md5", ] diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py index c6cbf75cd..fc25bcd07 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py @@ -11,11 +11,29 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The list of cryptographic algorithms supported in in-toto v1 provenance. +# The full list of cryptographic algorithms supported in in-toto v1 provenance. # These are used as keys within the digest set of the resource descriptors within the subject. -# For the full v1 list see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md +# For v1 see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md VALID_ALGORITHMS = [ + "sha256", + "sha224", + "sha384", + "sha512", + "sha512_224", + "sha512_256", + "sha3_224", + "sha3_256", + "sha3_384", + "sha3_512", + "shake128", + "shake256", + "blake2b", + "blake2s", + "ripemd160", + "sm3", + "gost", "sha1", + "md5", "gitCommit", # This special git value is equivalent to SHA-1 or SHA-256. See the v1 spec for more information. ] From 673b5620fd21c30a74085e5f7361253fd5e97454 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 19 Mar 2024 09:24:27 +1000 Subject: [PATCH 17/25] chore: address PR feedback. Signed-off-by: Ben Selwyn-Smith --- .../provenance/witness/__init__.py | 2 - .../repo_finder/test_provenance_extractor.py | 40 +++++++------------ 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/src/macaron/slsa_analyzer/provenance/witness/__init__.py b/src/macaron/slsa_analyzer/provenance/witness/__init__.py index cbe1afe8e..408fb31ca 100644 --- a/src/macaron/slsa_analyzer/provenance/witness/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/witness/__init__.py @@ -136,8 +136,6 @@ def extract_witness_provenance_subjects(witness_payload: InTotoPayload) -> set[W dict[str, str] A dictionary in which each key is a subject name and each value is the corresponding SHA256 digest. """ - # TODO: add support for in-toto v1 provenances. - if isinstance(witness_payload, InTotoV01Payload): subjects = witness_payload.statement["subject"] subject_digests = set() diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index 2c502d891..e2db85c94 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -261,7 +261,7 @@ def test_slsa_v1_gcb_1_is_invalid( slsa_v1_gcb_1_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and sourceToBuild.""" - assert _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) + _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_1_provenance) @@ -285,7 +285,7 @@ def test_slsa_v1_gcb_2_is_invalid( slsa_v1_gcb_2_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and configSource.""" - assert _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) + _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_2_provenance) @@ -309,7 +309,7 @@ def test_slsa_v1_github_is_invalid( slsa_v1_github_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType ) -> None: """Test invalidly modified SLSA v1 provenance with build type GitHub.""" - assert _json_modify(slsa_v1_github_provenance, keys, new_value) + _json_modify(slsa_v1_github_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v1_github_provenance) @@ -333,7 +333,7 @@ def test_slsa_v02_is_valid( ) def test_slsa_v02_is_invalid(slsa_v02_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: """Test invalidly modified SLSA v0.2 provenance.""" - assert _json_modify(slsa_v02_provenance, keys, new_value) + _json_modify(slsa_v02_provenance, keys, new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v02_provenance) @@ -356,14 +356,14 @@ def test_slsa_v01_is_invalid(slsa_v01_provenance: dict[str, JsonType], new_value """Test invalidly modified SLSA v0.1 provenance.""" materials = json_extract(slsa_v01_provenance, ["predicate", "materials"], list) material_index = json_extract(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], int) - assert _json_modify(materials[material_index], ["uri"], new_value) + _json_modify(materials[material_index], ["uri"], new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) def test_slsa_v01_invalid_material_index(slsa_v01_provenance: dict[str, JsonType]) -> None: """Test the SLSA v0.1 provenance with an invalid materials index.""" - assert _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) + _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) @@ -398,7 +398,7 @@ def test_witness_github_is_invalid( ) -> None: """Test invalidly modified Witness v0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) - assert _json_modify(attestations[attestation_index], keys, new_value) + _json_modify(attestations[attestation_index], keys, new_value) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(witness_github_provenance) @@ -406,7 +406,7 @@ def test_witness_github_is_invalid( def test_witness_github_remove_attestation(witness_github_provenance: dict[str, JsonType]) -> None: """Test removing Git attestation from Witness V0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) - assert _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) + _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) with pytest.raises(ProvenanceExtractionException): _test_extract_repo_and_commit_from_provenance(witness_github_provenance) @@ -440,28 +440,16 @@ def _test_extract_repo_and_commit_from_provenance( def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> bool: """Modify the value found by following the list of depth-sequential keys inside the passed JSON dictionary. - The found value will be overwritten by the new_value parameter. - If new_value is None, the value will be removed. - If the final key does not exist, it will be created as new_value. + The found value will be overwritten by the `new_value` parameter. + If `new_value` is `None`, the value will be removed. + If the final key does not exist, it will be created as `new_value`. """ - target = entry - last_target = None - - for key in keys: - if not isinstance(target, dict): - return False - if key not in target: - return False - last_target = target - target = target[key] - - if last_target is None: - return False + target: dict[str, JsonType] = json_extract(entry, keys[:-1], dict) if new_value is None: - del last_target[keys[len(keys) - 1]] + del target[keys[-1]] else: - last_target[keys[len(keys) - 1]] = new_value + target[keys[-1]] = new_value return True From c6481f654ca86391d5d64bcd7662d5421251cfe4 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 19 Mar 2024 09:28:19 +1000 Subject: [PATCH 18/25] chore: minor fix. Signed-off-by: Ben Selwyn-Smith --- tests/repo_finder/test_provenance_extractor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index e2db85c94..aa485f892 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -437,7 +437,7 @@ def _test_extract_repo_and_commit_from_provenance( assert expected_commit == commit -def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> bool: +def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> None: """Modify the value found by following the list of depth-sequential keys inside the passed JSON dictionary. The found value will be overwritten by the `new_value` parameter. @@ -451,8 +451,6 @@ def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> bool: else: target[keys[-1]] = new_value - return True - def _load_and_validate_json(payload: str) -> dict[str, JsonType]: """Load payload as JSON and validate it is of type dict.""" From c48c6508d687a210dff3a5a92cc017dd41e52e10 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 19 Mar 2024 16:05:39 +1000 Subject: [PATCH 19/25] chore: unify digest set validation across in-toto versions. Signed-off-by: Ben Selwyn-Smith --- src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index 1833e41be..52d30c81d 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -186,7 +186,9 @@ def is_valid_digest_set(digest: dict[str, JsonType]) -> TypeGuard[dict[str, str] ``True`` if the digest set is valid according to the spec, in which case its type is narrowed to a ``dict[str, str]``; ``False`` otherwise. """ - for value in digest.values(): - if not isinstance(value, str): + for key in digest: + if key not in VALID_ALGORITHMS: + return False + if not isinstance(digest[key], str): return False return True From 2267b185bfbfa5c4fd01a4ed1fef8c0df0000570 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 21 Mar 2024 09:16:40 +1000 Subject: [PATCH 20/25] chore: specify Git in SLSA digest set algorithm list. Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/provenance_extractor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index fee18a90c..048e1c09c 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -16,9 +16,9 @@ class ProvenanceExtractionException(MacaronError): """When there is an error while extracting from provenance.""" -SLSA_V01_DIGEST_SET_ALGORITHMS = ["sha1"] -SLSA_V02_DIGEST_SET_ALGORITHMS = ["sha1"] -SLSA_V1_DIGEST_SET_ALGORITHMS = ["sha1", "gitCommit"] +SLSA_V01_DIGEST_SET_GIT_ALGORITHMS = ["sha1"] +SLSA_V02_DIGEST_SET_GIT_ALGORITHMS = ["sha1"] +SLSA_V1_DIGEST_SET_GIT_ALGORITHMS = ["sha1", "gitCommit"] def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str, str]: @@ -90,7 +90,7 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: repo = _clean_spdx(uri) digest_set = json_extract(material, ["digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, SLSA_V01_DIGEST_SET_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V01_DIGEST_SET_GIT_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") @@ -112,7 +112,7 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: repo = _clean_spdx(uri) digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, SLSA_V02_DIGEST_SET_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V02_DIGEST_SET_GIT_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") @@ -153,7 +153,7 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: if url != repo: continue digest_set = json_extract(dep, ["digest"], dict) - commit = _extract_commit_from_digest_set(digest_set, SLSA_V1_DIGEST_SET_ALGORITHMS) + commit = _extract_commit_from_digest_set(digest_set, SLSA_V1_DIGEST_SET_GIT_ALGORITHMS) if not commit: raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") From b545b67b38459c9aa59d4bbdc3365870060f1f34 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 21 Mar 2024 10:45:20 +1000 Subject: [PATCH 21/25] chore: remove algorithm validation in digest set. Signed-off-by: Ben Selwyn-Smith --- .../provenance/intoto/v01/__init__.py | 27 ------------------ .../provenance/intoto/v1/__init__.py | 28 ------------------- 2 files changed, 55 deletions(-) diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index 52d30c81d..95fc3b304 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -10,31 +10,6 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The full list of cryptographic algorithms supported in in-toto v0.1 provenance. -# These are used as keys within the digest set of the resource descriptors within the subject. -# For v0.1 see: https://github.com/in-toto/attestation/blob/main/spec/v0.1.0/field_types.md#DigestSet -VALID_ALGORITHMS = [ - "sha256", - "sha224", - "sha384", - "sha512", - "sha512_224", - "sha512_256", - "sha3_224", - "sha3_256", - "sha3_384", - "sha3_512", - "shake128", - "shake256", - "blake2b", - "blake2s", - "ripemd160", - "sm3", - "gost", - "sha1", - "md5", -] - class InTotoV01Statement(TypedDict): """An in-toto version 0.1 statement. @@ -187,8 +162,6 @@ def is_valid_digest_set(digest: dict[str, JsonType]) -> TypeGuard[dict[str, str] is narrowed to a ``dict[str, str]``; ``False`` otherwise. """ for key in digest: - if key not in VALID_ALGORITHMS: - return False if not isinstance(digest[key], str): return False return True diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py index fc25bcd07..3ffe08bd6 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py @@ -11,32 +11,6 @@ from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.util import JsonType -# The full list of cryptographic algorithms supported in in-toto v1 provenance. -# These are used as keys within the digest set of the resource descriptors within the subject. -# For v1 see: https://github.com/in-toto/attestation/blob/main/spec/v1/digest_set.md -VALID_ALGORITHMS = [ - "sha256", - "sha224", - "sha384", - "sha512", - "sha512_224", - "sha512_256", - "sha3_224", - "sha3_256", - "sha3_384", - "sha3_512", - "shake128", - "shake256", - "blake2b", - "blake2s", - "ripemd160", - "sm3", - "gost", - "sha1", - "md5", - "gitCommit", # This special git value is equivalent to SHA-1 or SHA-256. See the v1 spec for more information. -] - class InTotoV1Statement(TypedDict): """An in-toto version 1 statement. @@ -191,8 +165,6 @@ def is_valid_digest_set(digest: JsonType) -> bool: if not isinstance(digest, dict): return False for key in digest: - if key not in VALID_ALGORITHMS: - return False if not isinstance(digest[key], str): return False return True From 26de80605e50223cf56512be9ef8760dbb178cb5 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 21 Mar 2024 16:46:27 +1000 Subject: [PATCH 22/25] chore: Move JSON utility function; Move errors to error script. Signed-off-by: Ben Selwyn-Smith --- src/macaron/errors.py | 8 ++ .../repo_finder/provenance_extractor.py | 90 +++++-------------- src/macaron/slsa_analyzer/analyzer.py | 8 +- .../repo_finder/test_provenance_extractor.py | 26 +++--- 4 files changed, 43 insertions(+), 89 deletions(-) diff --git a/src/macaron/errors.py b/src/macaron/errors.py index 5e892e1a6..a98a3bef5 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -58,3 +58,11 @@ class InvalidHTTPResponseError(MacaronError): class CheckRegistryError(MacaronError): """The Check Registry Error class.""" + + +class ProvenanceError(MacaronError): + """When there is an error while extracting from provenance.""" + + +class JsonError(MacaronError): + """When there is an error while extracting from JSON.""" diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index 048e1c09c..c30376a34 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -3,19 +3,15 @@ """This module contains methods for extracting repository and commit metadata from provenance files.""" import logging -from typing import TypeVar -from macaron.errors import MacaronError +from macaron.errors import JsonError, ProvenanceError +from macaron.json_tools import json_extract from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload from macaron.util import JsonType logger: logging.Logger = logging.getLogger(__name__) -class ProvenanceExtractionException(MacaronError): - """When there is an error while extracting from provenance.""" - - SLSA_V01_DIGEST_SET_GIT_ALGORITHMS = ["sha1"] SLSA_V02_DIGEST_SET_GIT_ALGORITHMS = ["sha1"] SLSA_V1_DIGEST_SET_GIT_ALGORITHMS = ["sha1", "gitCommit"] @@ -36,7 +32,7 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str Raises ------ - ProvenanceExtractionException + ProvenanceError If the extraction process fails for any reason. """ repo = "" @@ -53,9 +49,9 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str repo, commit = _extract_from_slsa_v01(payload) if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1": repo, commit = _extract_from_witness_provenance(payload) - except JsonExtractionException as error: + except JsonError as error: logger.debug(error) - raise ProvenanceExtractionException("JSON exception while extracting from provenance.") from error + raise ProvenanceError("JSON exception while extracting from provenance.") from error if not repo or not commit: msg = ( @@ -63,7 +59,7 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str f"predicate_type {predicate_type}, in-toto {str(type(payload))}." ) logger.debug(msg) - raise ProvenanceExtractionException(msg) + raise ProvenanceError(msg) logger.debug("Extracted repo and commit from provenance: %s, %s", repo, commit) return repo, commit @@ -73,17 +69,17 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v01 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - raise ProvenanceExtractionException("No predicate in payload statement.") + raise ProvenanceError("No predicate in payload statement.") # The repository URL and commit are stored inside an entry in the list of predicate -> materials. # In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry. list_index = json_extract(predicate, ["recipe", "definedInMaterial"], int) material_list = json_extract(predicate, ["materials"], list) if list_index >= len(material_list): - raise ProvenanceExtractionException("Material list index outside of material list bounds.") + raise ProvenanceError("Material list index outside of material list bounds.") material = material_list[list_index] if not material or not isinstance(material, dict): - raise ProvenanceExtractionException("Indexed material list entry is invalid.") + raise ProvenanceError("Indexed material list entry is invalid.") uri = json_extract(material, ["uri"], str) @@ -93,7 +89,7 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]: commit = _extract_commit_from_digest_set(digest_set, SLSA_V01_DIGEST_SET_GIT_ALGORITHMS) if not commit: - raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") + raise ProvenanceError("Failed to extract commit hash from provenance.") return repo, commit @@ -102,20 +98,20 @@ def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v02 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - raise ProvenanceExtractionException("No predicate in payload statement.") + raise ProvenanceError("No predicate in payload statement.") # The repository URL and commit are stored within the predicate -> invocation -> configSource object. # See https://slsa.dev/spec/v0.2/provenance uri = json_extract(predicate, ["invocation", "configSource", "uri"], str) if not uri: - raise ProvenanceExtractionException("Failed to extract repository URL from provenance.") + raise ProvenanceError("Failed to extract repository URL from provenance.") repo = _clean_spdx(uri) digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict) commit = _extract_commit_from_digest_set(digest_set, SLSA_V02_DIGEST_SET_GIT_ALGORITHMS) if not commit: - raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") + raise ProvenanceError("Failed to extract commit hash from provenance.") return repo, commit @@ -124,7 +120,7 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: """Extract the repository and commit metadata from the slsa v1 provenance payload.""" predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - raise ProvenanceExtractionException("No predicate in payload statement.") + raise ProvenanceError("No predicate in payload statement.") build_def = json_extract(predicate, ["buildDefinition"], dict) build_type = json_extract(build_def, ["buildType"], str) @@ -134,13 +130,13 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1": try: repo = json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str) - except JsonExtractionException: + except JsonError: repo = json_extract(build_def, ["externalParameters", "configSource", "repository"], str) if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1": repo = json_extract(build_def, ["externalParameters", "workflow", "repository"], str) if not repo: - raise ProvenanceExtractionException("Failed to extract repository URL from provenance.") + raise ProvenanceError("Failed to extract repository URL from provenance.") # Extract the commit hash. commit = "" @@ -156,7 +152,7 @@ def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]: commit = _extract_commit_from_digest_set(digest_set, SLSA_V1_DIGEST_SET_GIT_ALGORITHMS) if not commit: - raise ProvenanceExtractionException("Failed to extract commit hash from provenance.") + raise ProvenanceError("Failed to extract commit hash from provenance.") return repo, commit @@ -179,7 +175,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st """ predicate: dict[str, JsonType] | None = payload.statement.get("predicate") if not predicate: - raise ProvenanceExtractionException("No predicate in payload statement.") + raise ProvenanceError("No predicate in payload statement.") attestations = json_extract(predicate, ["attestations"], list) commit = "" @@ -198,7 +194,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st repo = json_extract(entry, ["attestation", "projecturl"], str) if not commit or not repo: - raise ProvenanceExtractionException("Could not extract repo and commit from provenance.") + raise ProvenanceError("Could not extract repo and commit from provenance.") return repo, commit @@ -216,7 +212,7 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor value = digest_set.get(key) if isinstance(value, str): return value - raise ProvenanceExtractionException(f"No valid digest in digest set: {digest_set.keys()} not in {valid_algorithms}") + raise ProvenanceError(f"No valid digest in digest set: {digest_set.keys()} not in {valid_algorithms}") def _clean_spdx(uri: str) -> str: @@ -226,49 +222,3 @@ def _clean_spdx(uri: str) -> str: """ url, _, _ = uri.lstrip("git+").rpartition("@") return url - - -class JsonExtractionException(MacaronError): - """When there is an error while extracting from JSON.""" - - -T = TypeVar("T", bound=JsonType) - - -def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T: - """Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary. - - The value must be of the passed type. - - Parameters - ---------- - entry: JsonType - An entry point into a JSON structure. - keys: list[str] - The list of depth-sequential keys within the JSON. - type: type[T] - The type to check the value against and return it as. - - Returns - ------- - T: - The found value as the type of the type parameter. - - Raises - ------ - JsonExtractionException - Raised if an error occurs while searching for or validating the value. - """ - target = entry - - for index, key in enumerate(keys): - if not isinstance(target, dict): - raise JsonExtractionException(f"Expect the value .{'.'.join(keys[:index])} to be a dict.") - if key not in target: - raise JsonExtractionException(f"JSON key '{key}' not found in .{'.'.join(keys[:index])}.") - target = target[key] - - if isinstance(target, type_): - return target - - raise JsonExtractionException(f"Expect the value .{'.'.join(keys)} to be of type '{type_}'.") diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index c3604d080..a8e90112d 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -26,6 +26,7 @@ DuplicateError, InvalidPURLError, MacaronError, + ProvenanceError, PURLNotFoundError, RepoCheckOutError, ) @@ -33,10 +34,7 @@ from macaron.output_reporter.results import Record, Report, SCMStatus from macaron.repo_finder import repo_finder from macaron.repo_finder.commit_finder import find_commit -from macaron.repo_finder.provenance_extractor import ( - ProvenanceExtractionException, - extract_repo_and_commit_from_provenance, -) +from macaron.repo_finder.provenance_extractor import extract_repo_and_commit_from_provenance from macaron.repo_finder.provenance_finder import ProvenanceFinder from macaron.slsa_analyzer import git_url from macaron.slsa_analyzer.analyze_context import AnalyzeContext @@ -660,7 +658,7 @@ def to_analysis_target( # Try to find repository and commit via provenance. try: repo, digest = extract_repo_and_commit_from_provenance(provenance_payload) - except ProvenanceExtractionException as error: + except ProvenanceError as error: logger.debug("Failed to extract repo and commit from provenance: %s", error) if repo and digest: diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index aa485f892..1ee27aa4e 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -6,11 +6,9 @@ import pytest -from macaron.repo_finder.provenance_extractor import ( - ProvenanceExtractionException, - extract_repo_and_commit_from_provenance, - json_extract, -) +from macaron.errors import ProvenanceError +from macaron.json_tools import json_extract +from macaron.repo_finder.provenance_extractor import extract_repo_and_commit_from_provenance from macaron.slsa_analyzer.provenance.intoto import validate_intoto_payload from macaron.util import JsonType @@ -262,7 +260,7 @@ def test_slsa_v1_gcb_1_is_invalid( ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and sourceToBuild.""" _json_modify(slsa_v1_gcb_1_provenance, keys, new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_1_provenance) @@ -286,7 +284,7 @@ def test_slsa_v1_gcb_2_is_invalid( ) -> None: """Test invalidly modified SLSA v1 provenance with build type gcb and configSource.""" _json_modify(slsa_v1_gcb_2_provenance, keys, new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v1_gcb_2_provenance) @@ -310,7 +308,7 @@ def test_slsa_v1_github_is_invalid( ) -> None: """Test invalidly modified SLSA v1 provenance with build type GitHub.""" _json_modify(slsa_v1_github_provenance, keys, new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v1_github_provenance) @@ -334,7 +332,7 @@ def test_slsa_v02_is_valid( def test_slsa_v02_is_invalid(slsa_v02_provenance: dict[str, JsonType], keys: list[str], new_value: JsonType) -> None: """Test invalidly modified SLSA v0.2 provenance.""" _json_modify(slsa_v02_provenance, keys, new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v02_provenance) @@ -357,14 +355,14 @@ def test_slsa_v01_is_invalid(slsa_v01_provenance: dict[str, JsonType], new_value materials = json_extract(slsa_v01_provenance, ["predicate", "materials"], list) material_index = json_extract(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], int) _json_modify(materials[material_index], ["uri"], new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) def test_slsa_v01_invalid_material_index(slsa_v01_provenance: dict[str, JsonType]) -> None: """Test the SLSA v0.1 provenance with an invalid materials index.""" _json_modify(slsa_v01_provenance, ["predicate", "recipe", "definedInMaterial"], 10) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(slsa_v01_provenance) @@ -399,7 +397,7 @@ def test_witness_github_is_invalid( """Test invalidly modified Witness v0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) _json_modify(attestations[attestation_index], keys, new_value) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(witness_github_provenance) @@ -407,7 +405,7 @@ def test_witness_github_remove_attestation(witness_github_provenance: dict[str, """Test removing Git attestation from Witness V0.1 GitHub provenance.""" attestations = json_extract(witness_github_provenance, ["predicate", "attestations"], list) _json_modify(witness_github_provenance, ["predicate", "attestations"], attestations[:1]) - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(witness_github_provenance) @@ -423,7 +421,7 @@ def test_witness_github_remove_attestation(witness_github_provenance: dict[str, def test_invalid_type_payloads(type_: str, predicate_type: str) -> None: """Test payloads with invalid type combinations.""" payload: dict[str, JsonType] = {"_type": type_, "predicateType": predicate_type, "subject": [], "predicate": {}} - with pytest.raises(ProvenanceExtractionException): + with pytest.raises(ProvenanceError): _test_extract_repo_and_commit_from_provenance(payload) From f8badc09396f22cf3628078b21039d0ddc7db1d7 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 21 Mar 2024 18:03:29 +1000 Subject: [PATCH 23/25] chore: move InvalidAnalysisTargetError to errors. Signed-off-by: Ben Selwyn-Smith --- src/macaron/errors.py | 4 ++++ src/macaron/slsa_analyzer/analyzer.py | 6 +----- tests/slsa_analyzer/test_analyzer.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/macaron/errors.py b/src/macaron/errors.py index a98a3bef5..f05540b6d 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -66,3 +66,7 @@ class ProvenanceError(MacaronError): class JsonError(MacaronError): """When there is an error while extracting from JSON.""" + + +class InvalidAnalysisTargetError(MacaronError): + """When a valid Analysis Target cannot be constructed.""" diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index a8e90112d..1687045b1 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -24,8 +24,8 @@ from macaron.errors import ( CloneError, DuplicateError, + InvalidAnalysisTargetError, InvalidPURLError, - MacaronError, ProvenanceError, PURLNotFoundError, RepoCheckOutError, @@ -1000,7 +1000,3 @@ def __init__(self, *args: Any, context: AnalyzeContext | None = None, **kwargs: """ super().__init__(*args, **kwargs) self.context: AnalyzeContext | None = context - - -class InvalidAnalysisTargetError(MacaronError): - """When a valid Analysis Target cannot be constructed.""" diff --git a/tests/slsa_analyzer/test_analyzer.py b/tests/slsa_analyzer/test_analyzer.py index 18e6eae59..f4e68f321 100644 --- a/tests/slsa_analyzer/test_analyzer.py +++ b/tests/slsa_analyzer/test_analyzer.py @@ -12,8 +12,8 @@ from packageurl import PackageURL from macaron.config.target_config import Configuration -from macaron.errors import InvalidPURLError -from macaron.slsa_analyzer.analyzer import Analyzer, InvalidAnalysisTargetError +from macaron.errors import InvalidAnalysisTargetError, InvalidPURLError +from macaron.slsa_analyzer.analyzer import Analyzer from ..macaron_testcase import MacaronTestCase From 64fb176ee6040ac2a1a5870232325e59d1248790 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 22 Mar 2024 09:14:17 +1000 Subject: [PATCH 24/25] chore: add integration test for provenance extractor; add json_tools script. Signed-off-by: Ben Selwyn-Smith --- scripts/dev_scripts/integration_tests.sh | 9 ++++ src/macaron/json_tools.py | 50 ++++++++++++++++++++++ tests/e2e/defaults/disable_repo_finder.ini | 5 +++ 3 files changed, 64 insertions(+) create mode 100644 src/macaron/json_tools.py create mode 100644 tests/e2e/defaults/disable_repo_finder.ini diff --git a/scripts/dev_scripts/integration_tests.sh b/scripts/dev_scripts/integration_tests.sh index c0828fa83..8d85b8b75 100755 --- a/scripts/dev_scripts/integration_tests.sh +++ b/scripts/dev_scripts/integration_tests.sh @@ -99,6 +99,15 @@ if [[ -z "$NO_NPM_TEST" ]]; then $RUN_MACARON analyze -purl pkg:npm/@sigstore/mock@0.1.0 -rp https://github.com/sigstore/sigstore-js -b main -d ebdcfdfbdfeb9c9aeee6df53674ef230613629f5 --skip-deps || log_fail check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail + + echo -e "\n----------------------------------------------------------------------------------" + echo "semver@7.6.0: Extracting repository URL and commit from provenance while Repo Finder is disabled." + echo -e "----------------------------------------------------------------------------------\n" + JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/purl/npm/semver/semver.json + JSON_RESULT=$WORKSPACE/output/reports/npm/semver/semver.json + $RUN_MACARON -dp tests/e2e/defaults/disable_repo_finder.ini analyze -purl pkg:npm/semver@7.6.0 || log_fail + + check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail fi echo -e "\n----------------------------------------------------------------------------------" diff --git a/src/macaron/json_tools.py b/src/macaron/json_tools.py new file mode 100644 index 000000000..64ad2cfd5 --- /dev/null +++ b/src/macaron/json_tools.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module provides utility functions for JSON data.""" + +from typing import TypeVar + +from macaron.errors import JsonError +from macaron.util import JsonType + +T = TypeVar("T", bound=JsonType) + + +def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T: + """Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary. + + The value must be of the passed type. + + Parameters + ---------- + entry: JsonType + An entry point into a JSON structure. + keys: list[str] + The list of depth-sequential keys within the JSON. + type: type[T] + The type to check the value against and return it as. + + Returns + ------- + T: + The found value as the type of the type parameter. + + Raises + ------ + JsonError + Raised if an error occurs while searching for or validating the value. + """ + target = entry + + for index, key in enumerate(keys): + if not isinstance(target, dict): + raise JsonError(f"Expect the value .{'.'.join(keys[:index])} to be a dict.") + if key not in target: + raise JsonError(f"JSON key '{key}' not found in .{'.'.join(keys[:index])}.") + target = target[key] + + if isinstance(target, type_): + return target + + raise JsonError(f"Expect the value .{'.'.join(keys)} to be of type '{type_}'.") diff --git a/tests/e2e/defaults/disable_repo_finder.ini b/tests/e2e/defaults/disable_repo_finder.ini new file mode 100644 index 000000000..ec4fd9216 --- /dev/null +++ b/tests/e2e/defaults/disable_repo_finder.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +[repofinder] +find_repos = False From 2e4678981b3f3a5ecf9bb397035f5802454a71cf Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 22 Mar 2024 09:16:42 +1000 Subject: [PATCH 25/25] chore: add integration test expected result. Signed-off-by: Ben Selwyn-Smith --- .../purl/npm/semver/semver.json | 334 ++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 tests/e2e/expected_results/purl/npm/semver/semver.json diff --git a/tests/e2e/expected_results/purl/npm/semver/semver.json b/tests/e2e/expected_results/purl/npm/semver/semver.json new file mode 100644 index 000000000..9fa549cba --- /dev/null +++ b/tests/e2e/expected_results/purl/npm/semver/semver.json @@ -0,0 +1,334 @@ +{ + "metadata": { + "timestamps": "2024-03-22 09:02:56", + "has_passing_check": true, + "run_checks": [ + "mcn_provenance_available_1", + "mcn_provenance_expectation_1", + "mcn_provenance_witness_level_one_1", + "mcn_trusted_builder_level_three_1", + "mcn_build_as_code_1", + "mcn_build_script_1", + "mcn_build_service_1", + "mcn_infer_artifact_pipeline_1", + "mcn_provenance_level_three_1", + "mcn_version_control_system_1" + ], + "check_tree": { + "mcn_provenance_available_1": { + "mcn_provenance_level_three_1": {}, + "mcn_provenance_expectation_1": {}, + "mcn_provenance_witness_level_one_1": {} + }, + "mcn_version_control_system_1": { + "mcn_trusted_builder_level_three_1": { + "mcn_build_as_code_1": { + "mcn_build_service_1": { + "mcn_build_script_1": {} + }, + "mcn_infer_artifact_pipeline_1": {} + } + } + } + } + }, + "target": { + "info": { + "full_name": "pkg:npm/semver@7.6.0", + "local_cloned_path": "git_repos/github_com/npm/node-semver", + "remote_path": "https://github.com/npm/node-semver", + "branch": null, + "commit_hash": "377f709718053a477ed717089c4403c4fec332a1", + "commit_date": "2024-02-05T09:03:38-08:00" + }, + "provenances": { + "is_inferred": false, + "content": { + "github_actions": [ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v0.2", + "predicate": { + "builder": { + "id": "" + }, + "buildType": "", + "invocation": { + "configSource": { + "uri": "", + "digest": { + "sha1": "" + }, + "entryPoint": "" + }, + "parameters": {}, + "environment": {} + }, + "buildConfig": { + "jobID": "", + "stepID": "" + }, + "metadata": { + "buildInvocationId": "", + "buildStartedOn": "", + "buildFinishedOn": "", + "completeness": { + "parameters": "false", + "environment": "false", + "materials": "false" + }, + "reproducible": "false" + }, + "materials": [ + { + "uri": "", + "digest": {} + } + ] + } + } + ], + "npm Registry": [ + { + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "pkg:npm/semver@7.6.0", + "digest": { + "sha512": "127c1786b9705cc93d80abb9fdf971e6cbff6a7e7b024469946de14caebc5bb1510cdfa4f8e5818fae4cefbd7d3a403cd972c1c6b717d0a4878fe5f908e84e56" + } + } + ], + "predicateType": "https://slsa.dev/provenance/v1", + "predicate": { + "buildDefinition": { + "buildType": "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1", + "externalParameters": { + "workflow": { + "ref": "refs/heads/main", + "repository": "https://github.com/npm/node-semver", + "path": ".github/workflows/release.yml" + } + }, + "internalParameters": { + "github": { + "event_name": "push", + "repository_id": "1357199", + "repository_owner_id": "6078720" + } + }, + "resolvedDependencies": [ + { + "uri": "git+https://github.com/npm/node-semver@refs/heads/main", + "digest": { + "gitCommit": "377f709718053a477ed717089c4403c4fec332a1" + } + } + ] + }, + "runDetails": { + "builder": { + "id": "https://github.com/actions/runner/github-hosted" + }, + "metadata": { + "invocationId": "https://github.com/npm/node-semver/actions/runs/7788106733/attempts/1" + } + } + } + } + ] + } + }, + "checks": { + "summary": { + "DISABLED": 0, + "FAILED": 4, + "PASSED": 5, + "SKIPPED": 0, + "UNKNOWN": 1 + }, + "results": [ + { + "check_id": "mcn_provenance_expectation_1", + "check_description": "Check whether the SLSA provenance for the produced artifact conforms to the expected value.", + "slsa_requirements": [ + "Provenance conforms with expectations - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "UNKNOWN" + }, + { + "check_id": "mcn_build_as_code_1", + "check_description": "The build definition and configuration executed by the build service is verifiably derived from text file definitions stored in a version control system.", + "slsa_requirements": [ + "Build as code - SLSA Level 3" + ], + "justification": [ + "build_tool_name: npm", + "ci_service_name: github_actions", + "deploy_command: [\"npm\", \"publish\", \"--provenance\", \"--tag=\\\"$1\\\"\"]", + { + "build_trigger": "https://github.com/npm/node-semver/blob/377f709718053a477ed717089c4403c4fec332a1/.github/workflows/release-integration.yml" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_build_script_1", + "check_description": "Check if the target repo has a valid build script.", + "slsa_requirements": [ + "Scripted Build - SLSA Level 1" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_build_service_1", + "check_description": "Check if the target repo has a valid build service.", + "slsa_requirements": [ + "Build service - SLSA Level 2" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_provenance_available_1", + "check_description": "Check whether the target has intoto provenance.", + "slsa_requirements": [ + "Provenance - Available - SLSA Level 1", + "Provenance content - Identifies build instructions - SLSA Level 1", + "Provenance content - Identifies artifacts - SLSA Level 1", + "Provenance content - Identifies builder - SLSA Level 1" + ], + "justification": [ + "asset_name: semver", + { + "asset_url": "https://registry.npmjs.org/-/npm/v1/attestations/semver@7.6.0" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_version_control_system_1", + "check_description": "Check whether the target repo uses a version control system.", + "slsa_requirements": [ + "Version controlled - SLSA Level 2" + ], + "justification": [ + { + "git_repo": "https://github.com/npm/node-semver" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_infer_artifact_pipeline_1", + "check_description": "Detects potential pipelines from which an artifact is published.", + "slsa_requirements": [ + "Build as code - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + }, + { + "check_id": "mcn_provenance_level_three_1", + "check_description": "Check whether the target has SLSA provenance level 3.", + "slsa_requirements": [ + "Provenance - Non falsifiable - SLSA Level 3", + "Provenance content - Includes all build parameters - SLSA Level 3", + "Provenance content - Identifies entry point - SLSA Level 3", + "Provenance content - Identifies source code - SLSA Level 2" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + }, + { + "check_id": "mcn_provenance_witness_level_one_1", + "check_description": "Check whether the target has a level-1 witness provenance.", + "slsa_requirements": [ + "Provenance - Available - SLSA Level 1", + "Provenance content - Identifies build instructions - SLSA Level 1", + "Provenance content - Identifies artifacts - SLSA Level 1", + "Provenance content - Identifies builder - SLSA Level 1" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + }, + { + "check_id": "mcn_trusted_builder_level_three_1", + "check_description": "Check whether the target uses a trusted SLSA level 3 builder.", + "slsa_requirements": [ + "Hermetic - SLSA Level 4", + "Isolated - SLSA Level 3", + "Parameterless - SLSA Level 4", + "Ephemeral environment - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + } + ] + } + }, + "dependencies": { + "analyzed_deps": 0, + "unique_dep_repos": 0, + "checks_summary": [ + { + "check_id": "mcn_provenance_available_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_expectation_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_witness_level_one_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_trusted_builder_level_three_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_as_code_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_script_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_service_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_infer_artifact_pipeline_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_level_three_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_version_control_system_1", + "num_deps_pass": 0 + } + ], + "dep_status": [] + } +}