Skip to content

Commit 4a8cbaa

Browse files
authored
feat: allow provenance files to be files containing a URL pointing to the actual provenance file which will be transparently downloaded (#710)
URL link file format is the Windows .url file format i.e. an ini file containing a "URL" field inside an "InternetShortcut" section. When processing the URL link provenance file, the content will be transparently downloaded if the hostname of the URL is configured as an allowed hostname in the defaults.ini file. Signed-off-by: Nicholas Allen <[email protected]>
1 parent e214326 commit 4a8cbaa

File tree

6 files changed

+396
-25
lines changed

6 files changed

+396
-25
lines changed

scripts/dev_scripts/integration_tests.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,30 @@ $RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -rp https://github
699699

700700
check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail
701701

702+
echo -e "\n----------------------------------------------------------------------------------"
703+
echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file"
704+
echo -e "----------------------------------------------------------------------------------\n"
705+
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json
706+
JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json
707+
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
708+
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/slsa_verifier.ini
709+
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
710+
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail
711+
712+
check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail
713+
714+
echo -e "\n----------------------------------------------------------------------------------"
715+
echo "Test verifying CUE provenance expectation for slsa-verifier with explicitly-provided provenance file as a URL link file"
716+
echo -e "----------------------------------------------------------------------------------\n"
717+
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json
718+
JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/slsa-verifier.json
719+
EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue
720+
DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/allow_url_link_github.ini
721+
PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl
722+
$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail
723+
724+
check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail
725+
702726
echo -e "\n----------------------------------------------------------------------------------"
703727
echo "urllib3/urllib3: Analyzing the repo path when automatic dependency resolution is skipped"
704728
echo "and CUE file is provided as expectation."

src/macaron/config/defaults.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,10 +468,14 @@ entry_conf =
468468
provenance_extensions =
469469
intoto.jsonl
470470
intoto.jsonl.gz
471+
intoto.jsonl.url
472+
intoto.jsonl.gz.url
471473
# This is the acceptable maximum size (in bytes) to download an asset.
472474
max_download_size = 70000000
473475
# This is the timeout (in seconds) to run the SLSA verifier.
474476
timeout = 120
477+
# The allowed hostnames for URL file links for provenance download
478+
url_link_hostname_allowlist =
475479

476480
# Witness provenance. See: https://github.com/testifysec/witness.
477481
[provenance.witness]

src/macaron/slsa_analyzer/provenance/loader.py

Lines changed: 83 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,71 @@
1-
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains the loaders for SLSA provenances."""
55

66
import base64
7+
import configparser
78
import gzip
89
import json
910
import zlib
11+
from urllib.parse import urlparse
1012

13+
from macaron.config.defaults import defaults
1114
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload
1215
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError
13-
from macaron.util import JsonType
16+
from macaron.util import JsonType, send_get_http_raw
1417

1518

16-
def load_provenance_file(filepath: str) -> dict[str, JsonType]:
17-
"""Load a provenance file and obtain the payload.
19+
def _try_read_url_link_file(file_content: bytes) -> str | None:
20+
parser = configparser.ConfigParser()
21+
try:
22+
parser.read_string(file_content.decode())
23+
return parser.get("InternetShortcut", "url", fallback=None)
24+
except (configparser.Error, UnicodeDecodeError):
25+
return None
1826

19-
Inside a provenance file is a DSSE envelope containing a base64-encoded
20-
provenance JSON payload. See: https://github.com/secure-systems-lab/dsse.
21-
If the file is gzipped, it will be transparently decompressed.
2227

23-
Parameters
24-
----------
25-
filepath : str
26-
Path to the provenance file.
28+
def _download_url_file_content(url: str, url_link_hostname_allowlist: list[str]) -> bytes:
29+
hostname = urlparse(url).hostname
30+
if hostname is None or hostname == "":
31+
raise LoadIntotoAttestationError("Cannot resolve URL link file: invalid URL")
32+
if hostname not in url_link_hostname_allowlist:
33+
raise LoadIntotoAttestationError(
34+
"Cannot resolve URL link file: target hostname '" + hostname + "' is not in allowed hostnames."
35+
)
2736

28-
Returns
29-
-------
30-
dict[str, JsonType]
31-
The provenance JSON payload.
37+
# TODO download size limit?
38+
timeout = defaults.getint("downloads", "timeout", fallback=120)
39+
response = send_get_http_raw(url=url, headers=None, timeout=timeout)
40+
if response is None:
41+
raise LoadIntotoAttestationError("Cannot resolve URL link file: Failed to download file")
42+
if response.status_code != 200:
43+
raise LoadIntotoAttestationError(
44+
"Cannot resolve URL link file: Failed to download file, error " + str(response.status_code)
45+
)
46+
return response.content
47+
48+
49+
def _load_provenance_file_content(
50+
file_content: bytes, url_link_hostname_allowlist: list[str], url_link_depth_limit: int = 5
51+
) -> dict[str, JsonType]:
52+
url_link_depth = 0
53+
while url_link_depth <= url_link_depth_limit:
54+
url = _try_read_url_link_file(file_content)
55+
if url is None:
56+
break
57+
if url_link_depth == url_link_depth_limit:
58+
raise LoadIntotoAttestationError("Cannot resolve URL link file: depth limit exceeded")
59+
file_content = _download_url_file_content(url, url_link_hostname_allowlist)
60+
url_link_depth = url_link_depth + 1
3261

33-
Raises
34-
------
35-
LoadIntotoAttestationError
36-
If there is an error loading the provenance JSON payload.
37-
"""
3862
try:
3963
try:
40-
with gzip.open(filepath, mode="rt", encoding="utf-8") as file:
41-
provenance = json.load(file)
64+
decompressed_file_content = gzip.decompress(file_content)
65+
provenance = json.loads(decompressed_file_content.decode())
4266
except (gzip.BadGzipFile, EOFError, zlib.error):
43-
with open(filepath, encoding="utf-8") as file:
44-
provenance = json.load(file)
45-
except (OSError, json.JSONDecodeError, TypeError) as error:
67+
provenance = json.loads(file_content.decode())
68+
except (json.JSONDecodeError, TypeError, UnicodeDecodeError) as error:
4669
raise LoadIntotoAttestationError(
4770
"Cannot deserialize the file content as JSON.",
4871
) from error
@@ -71,6 +94,41 @@ def load_provenance_file(filepath: str) -> dict[str, JsonType]:
7194
return json_payload
7295

7396

97+
def load_provenance_file(filepath: str) -> dict[str, JsonType]:
98+
"""Load a provenance file and obtain the payload.
99+
100+
Inside a provenance file is a DSSE envelope containing a base64-encoded
101+
provenance JSON payload. See: https://github.com/secure-systems-lab/dsse.
102+
If the file is gzipped, it will be transparently decompressed.
103+
If the file is a URL file (Windows .url file format, i.e. an ini file with
104+
a "URL" field inside an "InternetShortcut" section), it will be transparently
105+
downloaded.
106+
107+
Parameters
108+
----------
109+
filepath : str
110+
Path to the provenance file.
111+
112+
Returns
113+
-------
114+
dict[str, JsonType]
115+
The provenance JSON payload.
116+
117+
Raises
118+
------
119+
LoadIntotoAttestationError
120+
If there is an error loading the provenance JSON payload.
121+
"""
122+
try:
123+
with open(filepath, mode="rb") as file:
124+
file_content = file.read()
125+
return _load_provenance_file_content(
126+
file_content, defaults.get_list("slsa.verifier", "url_link_hostname_allowlist", fallback=[])
127+
)
128+
except OSError as error:
129+
raise LoadIntotoAttestationError("Cannot open file.") from error
130+
131+
74132
def load_provenance_payload(filepath: str) -> InTotoPayload:
75133
"""Load, verify, and construct an in-toto payload.
76134
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
[slsa.verifier]
5+
url_link_hostname_allowlist =
6+
github.com
7+
8+
[analysis.checks]
9+
exclude = mcn_provenance_level_three_1
10+
include = *

0 commit comments

Comments
 (0)