diff --git a/scripts/dev_scripts/integration_tests.sh b/scripts/dev_scripts/integration_tests.sh index e7fef2cbf..c0e3f060d 100755 --- a/scripts/dev_scripts/integration_tests.sh +++ b/scripts/dev_scripts/integration_tests.sh @@ -699,6 +699,30 @@ $RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -rp https://github check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail +echo -e "\n----------------------------------------------------------------------------------" +echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file" +echo -e "----------------------------------------------------------------------------------\n" +JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json +JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json +EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue +DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/slsa_verifier.ini +PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl +$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail + +check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail + +echo -e "\n----------------------------------------------------------------------------------" +echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file as a URL link file" +echo -e "----------------------------------------------------------------------------------\n" +JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json +JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json +EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue +DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/allow_url_link_github.ini +PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl +$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail + +check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail + echo -e "\n----------------------------------------------------------------------------------" echo "urllib3/urllib3: Analyzing the repo path when automatic dependency resolution is skipped" echo "and CUE file is provided as expectation." diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 9a63e8edb..ce849dfe8 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -468,10 +468,14 @@ entry_conf = provenance_extensions = intoto.jsonl intoto.jsonl.gz + intoto.jsonl.url + intoto.jsonl.gz.url # This is the acceptable maximum size (in bytes) to download an asset. max_download_size = 70000000 # This is the timeout (in seconds) to run the SLSA verifier. timeout = 120 +# The allowed hostnames for URL file links for provenance download +url_link_hostname_allowlist = # Witness provenance. See: https://github.com/testifysec/witness. [provenance.witness] diff --git a/src/macaron/slsa_analyzer/provenance/loader.py b/src/macaron/slsa_analyzer/provenance/loader.py index 329daa9f0..29e95e8dd 100644 --- a/src/macaron/slsa_analyzer/provenance/loader.py +++ b/src/macaron/slsa_analyzer/provenance/loader.py @@ -1,48 +1,71 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the loaders for SLSA provenances.""" import base64 +import configparser import gzip import json import zlib +from urllib.parse import urlparse +from macaron.config.defaults import defaults from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError -from macaron.util import JsonType +from macaron.util import JsonType, send_get_http_raw -def load_provenance_file(filepath: str) -> dict[str, JsonType]: - """Load a provenance file and obtain the payload. +def _try_read_url_link_file(file_content: bytes) -> str | None: + parser = configparser.ConfigParser() + try: + parser.read_string(file_content.decode()) + return parser.get("InternetShortcut", "url", fallback=None) + except (configparser.Error, UnicodeDecodeError): + return None - Inside a provenance file is a DSSE envelope containing a base64-encoded - provenance JSON payload. See: https://github.com/secure-systems-lab/dsse. - If the file is gzipped, it will be transparently decompressed. - Parameters - ---------- - filepath : str - Path to the provenance file. +def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes: + hostname = urlparse(url).hostname + if hostname is None or hostname == "": + raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL") + if hostname not in url_link_hostname_allowlist: + raise LoadIntotoAttestationError( + "Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames." + ) - Returns - ------- - dict[str, JsonType] - The provenance JSON payload. + # TODO download size limit? + timeout = defaults.getint("downloads", "timeout", fallback=120) + response = send_get_http_raw(url=url, headers=None, timeout=timeout) + if response is None: + raise LoadIntotoAttestationError("Cannot resolve URL link file: Failed to download file") + if response.status_code != 200: + raise LoadIntotoAttestationError( + "Cannot resolve URL link file: Failed to download file, error " + str(response.status_code) + ) + return response.content + + +def _load_provenance_file_content( + file_content: bytes, url_link_hostname_allowlist: list[str], url_link_depth_limit: int = 5 +) -> dict[str, JsonType]: + url_link_depth = 0 + while url_link_depth <= url_link_depth_limit: + url = _try_read_url_link_file(file_content) + if url is None: + break + if url_link_depth == url_link_depth_limit: + raise LoadIntotoAttestationError("Cannot resolve URL link file: depth limit exceeded") + file_content = _download_url_file_content(url, url_link_hostname_allowlist) + url_link_depth = url_link_depth + 1 - Raises - ------ - LoadIntotoAttestationError - If there is an error loading the provenance JSON payload. - """ try: try: - with gzip.open(filepath, mode="rt", encoding="utf-8") as file: - provenance = json.load(file) + decompressed_file_content = gzip.decompress(file_content) + provenance = json.loads(decompressed_file_content.decode()) except (gzip.BadGzipFile, EOFError, zlib.error): - with open(filepath, encoding="utf-8") as file: - provenance = json.load(file) - except (OSError, json.JSONDecodeError, TypeError) as error: + provenance = json.loads(file_content.decode()) + except (json.JSONDecodeError, TypeError, UnicodeDecodeError) as error: raise LoadIntotoAttestationError( "Cannot deserialize the file content as JSON.", ) from error @@ -71,6 +94,41 @@ def load_provenance_file(filepath: str) -> dict[str, JsonType]: return json_payload +def load_provenance_file(filepath: str) -> dict[str, JsonType]: + """Load a provenance file and obtain the payload. + + Inside a provenance file is a DSSE envelope containing a base64-encoded + provenance JSON payload. See: https://github.com/secure-systems-lab/dsse. + If the file is gzipped, it will be transparently decompressed. + If the file is a URL file (Windows .url file format, i.e. an ini file with + a "URL" field inside an "InternetShortcut" section), it will be transparently + downloaded. + + Parameters + ---------- + filepath : str + Path to the provenance file. + + Returns + ------- + dict[str, JsonType] + The provenance JSON payload. + + Raises + ------ + LoadIntotoAttestationError + If there is an error loading the provenance JSON payload. + """ + try: + with open(filepath, mode="rb") as file: + file_content = file.read() + return _load_provenance_file_content( + file_content, defaults.get_list("slsa.verifier", "url_link_hostname_allowlist", fallback=[]) + ) + except OSError as error: + raise LoadIntotoAttestationError("Cannot open file.") from error + + def load_provenance_payload(filepath: str) -> InTotoPayload: """Load, verify, and construct an in-toto payload. diff --git a/tests/e2e/defaults/allow_url_link_github.ini b/tests/e2e/defaults/allow_url_link_github.ini new file mode 100644 index 000000000..67ccdeb36 --- /dev/null +++ b/tests/e2e/defaults/allow_url_link_github.ini @@ -0,0 +1,10 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +[slsa.verifier] +url_link_hostname_allowlist = + github.com + +[analysis.checks] +exclude = mcn_provenance_level_three_1 +include = * diff --git a/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json b/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json new file mode 100644 index 000000000..d88f06d20 --- /dev/null +++ b/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json @@ -0,0 +1,273 @@ +{ + "metadata": { + "timestamps": "2024-04-18 13:30:07", + "has_passing_check": true, + "run_checks": [ + "mcn_provenance_expectation_1", + "mcn_build_as_code_1", + "mcn_infer_artifact_pipeline_1", + "mcn_provenance_witness_level_one_1", + "mcn_provenance_available_1", + "mcn_build_service_1", + "mcn_trusted_builder_level_three_1", + "mcn_build_script_1", + "mcn_version_control_system_1" + ], + "check_tree": { + "mcn_provenance_available_1": { + "mcn_provenance_level_three_1": {}, + "mcn_provenance_expectation_1": {}, + "mcn_provenance_witness_level_one_1": {} + }, + "mcn_version_control_system_1": { + "mcn_build_script_1": {}, + "mcn_trusted_builder_level_three_1": { + "mcn_build_as_code_1": { + "mcn_infer_artifact_pipeline_1": {}, + "mcn_build_service_1": {} + } + } + } + } + }, + "target": { + "info": { + "full_name": "pkg:github.com/slsa-framework/slsa-verifier@fc50b662fcfeeeb0e97243554b47d9b20b14efac", + "local_cloned_path": "git_repos/github_com/slsa-framework/slsa-verifier", + "remote_path": "https://github.com/slsa-framework/slsa-verifier", + "branch": "main", + "commit_hash": "fc50b662fcfeeeb0e97243554b47d9b20b14efac", + "commit_date": "2022-10-04T01:00:02+00:00" + }, + "provenances": { + "is_inferred": true, + "content": { + "github_actions": [ + { + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [], + "predicateType": "https://slsa.dev/provenance/v0.2", + "predicate": { + "builder": { + "id": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + }, + "buildType": "Custom github_actions", + "invocation": { + "configSource": { + "uri": "https://github.com/slsa-framework/slsa-verifier@refs/heads/main", + "digest": { + "sha1": "fc50b662fcfeeeb0e97243554b47d9b20b14efac" + }, + "entryPoint": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + }, + "parameters": {}, + "environment": {} + }, + "buildConfig": { + "jobID": "", + "stepID": "", + "stepName": "" + }, + "metadata": { + "buildInvocationId": "", + "buildStartedOn": "", + "buildFinishedOn": "", + "completeness": { + "parameters": "false", + "environment": "false", + "materials": "false" + }, + "reproducible": "false" + }, + "materials": [ + { + "uri": "", + "digest": {} + } + ] + } + } + ], + "npm Registry": [] + } + }, + "checks": { + "summary": { + "DISABLED": 0, + "FAILED": 2, + "PASSED": 7, + "SKIPPED": 0, + "UNKNOWN": 0 + }, + "results": [ + { + "check_id": "mcn_build_as_code_1", + "check_description": "The build definition and configuration executed by the build service is verifiably derived from text file definitions stored in a version control system.", + "slsa_requirements": [ + "Build as code - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_build_script_1", + "check_description": "Check if the target repo has a valid build script.", + "slsa_requirements": [ + "Scripted Build - SLSA Level 1" + ], + "justification": [ + "build_tool_name: go", + "ci_service_name: github_actions", + "language: BuildLanguage.GO", + "build_tool_command: [\"go\", \"build\", \"-mod=vendor\", \"-o\", \"service\", \"./cli/experimental/service/\"]", + { + "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/pre-submit.cli.yml" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_build_service_1", + "check_description": "Check if the target repo has a valid build service.", + "slsa_requirements": [ + "Build service - SLSA Level 2" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_provenance_available_1", + "check_description": "Check whether the target has intoto provenance.", + "slsa_requirements": [ + "Provenance - Available - SLSA Level 1", + "Provenance content - Identifies build instructions - SLSA Level 1", + "Provenance content - Identifies artifacts - SLSA Level 1", + "Provenance content - Identifies builder - SLSA Level 1" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_provenance_expectation_1", + "check_description": "Check whether the SLSA provenance for the produced artifact conforms to the expected value.", + "slsa_requirements": [ + "Provenance conforms with expectations - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_trusted_builder_level_three_1", + "check_description": "Check whether the target uses a trusted SLSA level 3 builder.", + "slsa_requirements": [ + "Hermetic - SLSA Level 4", + "Isolated - SLSA Level 3", + "Parameterless - SLSA Level 4", + "Ephemeral environment - SLSA Level 3" + ], + "justification": [ + "build_tool_name: slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@v1.2.0", + "ci_service_name: github_actions", + { + "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_version_control_system_1", + "check_description": "Check whether the target repo uses a version control system.", + "slsa_requirements": [ + "Version controlled - SLSA Level 2" + ], + "justification": [ + { + "git_repo": "https://github.com/slsa-framework/slsa-verifier" + } + ], + "result_type": "PASSED" + }, + { + "check_id": "mcn_infer_artifact_pipeline_1", + "check_description": "Detects potential pipelines from which an artifact is published.", + "slsa_requirements": [ + "Build as code - SLSA Level 3" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + }, + { + "check_id": "mcn_provenance_witness_level_one_1", + "check_description": "Check whether the target has a level-1 witness provenance.", + "slsa_requirements": [ + "Provenance - Available - SLSA Level 1", + "Provenance content - Identifies build instructions - SLSA Level 1", + "Provenance content - Identifies artifacts - SLSA Level 1", + "Provenance content - Identifies builder - SLSA Level 1" + ], + "justification": [ + "Not Available." + ], + "result_type": "FAILED" + } + ] + } + }, + "dependencies": { + "analyzed_deps": 0, + "unique_dep_repos": 0, + "checks_summary": [ + { + "check_id": "mcn_provenance_available_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_as_code_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_witness_level_one_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_trusted_builder_level_three_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_service_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_expectation_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_infer_artifact_pipeline_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_build_script_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_provenance_level_three_1", + "num_deps_pass": 0 + }, + { + "check_id": "mcn_version_control_system_1", + "num_deps_pass": 0 + } + ], + "dep_status": [] + } +} diff --git a/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl.url b/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl.url new file mode 100644 index 000000000..68be68c25 --- /dev/null +++ b/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl.url @@ -0,0 +1,2 @@ +[InternetShortcut] +URL=https://github.com/slsa-framework/slsa-verifier/releases/download/v2.5.1/slsa-verifier-linux-amd64.intoto.jsonl