Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions scripts/dev_scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,30 @@ $RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -rp https://github

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

echo -e "\n----------------------------------------------------------------------------------"
echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file"
echo -e "----------------------------------------------------------------------------------\n"
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json
JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/slsa_verifier.ini
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

echo -e "\n----------------------------------------------------------------------------------"
echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file as a URL link file"
echo -e "----------------------------------------------------------------------------------\n"
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json
JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/allow_url_link_github.ini
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail

check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

echo -e "\n----------------------------------------------------------------------------------"
echo "urllib3/urllib3: Analyzing the repo path when automatic dependency resolution is skipped"
echo "and CUE file is provided as expectation."
Expand Down
4 changes: 4 additions & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,14 @@ entry_conf =
provenance_extensions =
intoto.jsonl
intoto.jsonl.gz
intoto.jsonl.url
intoto.jsonl.gz.url
# This is the acceptable maximum size (in bytes) to download an asset.
max_download_size = 70000000
# This is the timeout (in seconds) to run the SLSA verifier.
timeout = 120
# The allowed hostnames for URL file links for provenance download
url_link_hostname_allowlist =

# Witness provenance. See: https://github.com/testifysec/witness.
[provenance.witness]
Expand Down
108 changes: 83 additions & 25 deletions src/macaron/slsa_analyzer/provenance/loader.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,71 @@
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the loaders for SLSA provenances."""

import base64
import configparser
import gzip
import json
import zlib
from urllib.parse import urlparse

from macaron.config.defaults import defaults
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError
from macaron.util import JsonType
from macaron.util import JsonType, send_get_http_raw


def load_provenance_file(filepath: str) -> dict[str, JsonType]:
"""Load a provenance file and obtain the payload.
def _try_read_url_link_file(file_content: bytes) -> str | None:
parser = configparser.ConfigParser()
try:
parser.read_string(file_content.decode())
return parser.get("InternetShortcut", "url", fallback=None)
except (configparser.Error, UnicodeDecodeError):
return None

Inside a provenance file is a DSSE envelope containing a base64-encoded
provenance JSON payload. See: https://github.com/secure-systems-lab/dsse.
If the file is gzipped, it will be transparently decompressed.

Parameters
----------
filepath : str
Path to the provenance file.
def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes:
hostname = urlparse(url).hostname
if hostname is None or hostname == "":
raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL")
if hostname not in url_link_hostname_allowlist:
raise LoadIntotoAttestationError(
"Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames."
)

Returns
-------
dict[str, JsonType]
The provenance JSON payload.
# TODO download size limit?
timeout = defaults.getint("downloads", "timeout", fallback=120)
response = send_get_http_raw(url=url, headers=None, timeout=timeout)
if response is None:
raise LoadIntotoAttestationError("Cannot resolve URL link file: Failed to download file")
if response.status_code != 200:
raise LoadIntotoAttestationError(
"Cannot resolve URL link file: Failed to download file, error " + str(response.status_code)
)
return response.content


def _load_provenance_file_content(
file_content: bytes, url_link_hostname_allowlist: list[str], url_link_depth_limit: int = 5
) -> dict[str, JsonType]:
url_link_depth = 0
while url_link_depth <= url_link_depth_limit:
url = _try_read_url_link_file(file_content)
if url is None:
break
if url_link_depth == url_link_depth_limit:
raise LoadIntotoAttestationError("Cannot resolve URL link file: depth limit exceeded")
file_content = _download_url_file_content(url, url_link_hostname_allowlist)
url_link_depth = url_link_depth + 1

Raises
------
LoadIntotoAttestationError
If there is an error loading the provenance JSON payload.
"""
try:
try:
with gzip.open(filepath, mode="rt", encoding="utf-8") as file:
provenance = json.load(file)
decompressed_file_content = gzip.decompress(file_content)
provenance = json.loads(decompressed_file_content.decode())
except (gzip.BadGzipFile, EOFError, zlib.error):
with open(filepath, encoding="utf-8") as file:
provenance = json.load(file)
except (OSError, json.JSONDecodeError, TypeError) as error:
provenance = json.loads(file_content.decode())
except (json.JSONDecodeError, TypeError, UnicodeDecodeError) as error:
raise LoadIntotoAttestationError(
"Cannot deserialize the file content as JSON.",
) from error
Expand Down Expand Up @@ -71,6 +94,41 @@ def load_provenance_file(filepath: str) -> dict[str, JsonType]:
return json_payload


def load_provenance_file(filepath: str) -> dict[str, JsonType]:
"""Load a provenance file and obtain the payload.

Inside a provenance file is a DSSE envelope containing a base64-encoded
provenance JSON payload. See: https://github.com/secure-systems-lab/dsse.
If the file is gzipped, it will be transparently decompressed.
If the file is a URL file (Windows .url file format, i.e. an ini file with
a "URL" field inside an "InternetShortcut" section), it will be transparently
downloaded.

Parameters
----------
filepath : str
Path to the provenance file.

Returns
-------
dict[str, JsonType]
The provenance JSON payload.

Raises
------
LoadIntotoAttestationError
If there is an error loading the provenance JSON payload.
"""
try:
with open(filepath, mode="rb") as file:
file_content = file.read()
return _load_provenance_file_content(
file_content, defaults.get_list("slsa.verifier", "url_link_hostname_allowlist", fallback=[])
)
except OSError as error:
raise LoadIntotoAttestationError("Cannot open file.") from error


def load_provenance_payload(filepath: str) -> InTotoPayload:
"""Load, verify, and construct an in-toto payload.

Expand Down
10 changes: 10 additions & 0 deletions tests/e2e/defaults/allow_url_link_github.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

[slsa.verifier]
url_link_hostname_allowlist =
github.com

[analysis.checks]
exclude = mcn_provenance_level_three_1
include = *
Loading