44"""This module contains methods for extracting repository and commit metadata from provenance files."""
55import logging
66
7- from macaron .errors import JsonError , ProvenanceError
7+ from macaron .errors import ProvenanceError
88from macaron .json_tools import json_extract
99from macaron .slsa_analyzer .provenance .intoto import InTotoPayload , InTotoV1Payload , InTotoV01Payload
1010from macaron .util import JsonType
1717SLSA_V1_DIGEST_SET_GIT_ALGORITHMS = ["sha1" , "gitCommit" ]
1818
1919
20- def extract_repo_and_commit_from_provenance (payload : InTotoPayload ) -> tuple [str , str ]:
20+ def extract_repo_and_commit_from_provenance (payload : InTotoPayload ) -> tuple [str | None , str | None ]:
2121 """Extract the repository and commit metadata from the passed provenance payload.
2222
2323 Parameters
@@ -35,129 +35,137 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str
3535 ProvenanceError
3636 If the extraction process fails for any reason.
3737 """
38- repo = ""
39- commit = ""
4038 predicate_type = payload .statement .get ("predicateType" )
41- try :
42- if isinstance (payload , InTotoV1Payload ):
43- if predicate_type == "https://slsa.dev/provenance/v1" :
44- repo , commit = _extract_from_slsa_v1 (payload )
45- elif isinstance (payload , InTotoV01Payload ):
46- if predicate_type == "https://slsa.dev/provenance/v0.2" :
47- repo , commit = _extract_from_slsa_v02 (payload )
48- if predicate_type == "https://slsa.dev/provenance/v0.1" :
49- repo , commit = _extract_from_slsa_v01 (payload )
50- if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1" :
51- repo , commit = _extract_from_witness_provenance (payload )
52- except JsonError as error :
53- logger .debug (error )
54- raise ProvenanceError ("JSON exception while extracting from provenance." ) from error
55-
56- if not repo or not commit :
57- msg = (
58- f"Extraction from provenance not supported for versions: "
59- f"predicate_type { predicate_type } , in-toto { str (type (payload ))} ."
60- )
61- logger .debug (msg )
62- raise ProvenanceError (msg )
63-
64- logger .debug ("Extracted repo and commit from provenance: %s, %s" , repo , commit )
65- return repo , commit
66-
67-
68- def _extract_from_slsa_v01 (payload : InTotoV01Payload ) -> tuple [str , str ]:
39+ if isinstance (payload , InTotoV1Payload ):
40+ if predicate_type == "https://slsa.dev/provenance/v1" :
41+ return _extract_from_slsa_v1 (payload )
42+ elif isinstance (payload , InTotoV01Payload ):
43+ if predicate_type == "https://slsa.dev/provenance/v0.2" :
44+ return _extract_from_slsa_v02 (payload )
45+ if predicate_type == "https://slsa.dev/provenance/v0.1" :
46+ return _extract_from_slsa_v01 (payload )
47+ if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1" :
48+ return _extract_from_witness_provenance (payload )
49+
50+ msg = (
51+ f"Extraction from provenance not supported for versions: "
52+ f"predicate_type { predicate_type } , in-toto { str (type (payload ))} ."
53+ )
54+ logger .debug (msg )
55+ raise ProvenanceError (msg )
56+
57+
58+ def _extract_from_slsa_v01 (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
6959 """Extract the repository and commit metadata from the slsa v01 provenance payload."""
7060 predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
7161 if not predicate :
72- raise ProvenanceError ( "No predicate in payload statement." )
62+ return None , None
7363
7464 # The repository URL and commit are stored inside an entry in the list of predicate -> materials.
7565 # In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry.
7666 list_index = json_extract (predicate , ["recipe" , "definedInMaterial" ], int )
67+ if not list_index :
68+ return None , None
69+
7770 material_list = json_extract (predicate , ["materials" ], list )
71+ if not material_list :
72+ return None , None
73+
7874 if list_index >= len (material_list ):
79- raise ProvenanceError ("Material list index outside of material list bounds." )
75+ logger .debug ("Material list index outside of material list bounds." )
76+ return None , None
77+
8078 material = material_list [list_index ]
8179 if not material or not isinstance (material , dict ):
82- raise ProvenanceError ("Indexed material list entry is invalid." )
80+ logger .debug ("Indexed material list entry is invalid." )
81+ return None , None
8382
83+ repo = None
8484 uri = json_extract (material , ["uri" ], str )
85-
86- repo = _clean_spdx (uri )
85+ if uri :
86+ repo = _clean_spdx (uri )
8787
8888 digest_set = json_extract (material , ["digest" ], dict )
89+ if not digest_set :
90+ return repo , None
8991 commit = _extract_commit_from_digest_set (digest_set , SLSA_V01_DIGEST_SET_GIT_ALGORITHMS )
9092
91- if not commit :
92- raise ProvenanceError ("Failed to extract commit hash from provenance." )
93-
94- return repo , commit
93+ return repo , commit or None
9594
9695
97- def _extract_from_slsa_v02 (payload : InTotoV01Payload ) -> tuple [str , str ]:
96+ def _extract_from_slsa_v02 (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
9897 """Extract the repository and commit metadata from the slsa v02 provenance payload."""
9998 predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
10099 if not predicate :
101- raise ProvenanceError ("No predicate in payload statement." )
100+ logger .debug ("No predicate in payload statement." )
101+ return None , None
102102
103103 # The repository URL and commit are stored within the predicate -> invocation -> configSource object.
104104 # See https://slsa.dev/spec/v0.2/provenance
105+ repo = None
105106 uri = json_extract (predicate , ["invocation" , "configSource" , "uri" ], str )
106- if not uri :
107- raise ProvenanceError ("Failed to extract repository URL from provenance." )
108- repo = _clean_spdx (uri )
107+ if uri :
108+ repo = _clean_spdx (uri )
109109
110110 digest_set = json_extract (predicate , ["invocation" , "configSource" , "digest" ], dict )
111+ if not digest_set :
112+ return repo , None
111113 commit = _extract_commit_from_digest_set (digest_set , SLSA_V02_DIGEST_SET_GIT_ALGORITHMS )
112114
113- if not commit :
114- raise ProvenanceError ("Failed to extract commit hash from provenance." )
115-
116- return repo , commit
115+ return repo , commit or None
117116
118117
119- def _extract_from_slsa_v1 (payload : InTotoV1Payload ) -> tuple [str , str ]:
118+ def _extract_from_slsa_v1 (payload : InTotoV1Payload ) -> tuple [str | None , str | None ]:
120119 """Extract the repository and commit metadata from the slsa v1 provenance payload."""
121120 predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
122121 if not predicate :
123- raise ProvenanceError ("No predicate in payload statement." )
122+ logger .debug ("No predicate in payload statement." )
123+ return None , None
124124
125125 build_def = json_extract (predicate , ["buildDefinition" ], dict )
126+ if not build_def :
127+ return None , None
128+
126129 build_type = json_extract (build_def , ["buildType" ], str )
130+ if not build_type :
131+ return None , None
127132
128133 # Extract the repository URL.
129- repo = ""
134+ repo = None
130135 if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1" :
131- try :
132- repo = json_extract (build_def , ["externalParameters" , "sourceToBuild" , "repository" ], str )
133- except JsonError :
136+ repo = json_extract (build_def , ["externalParameters" , "sourceToBuild" , "repository" ], str )
137+ if not repo :
134138 repo = json_extract (build_def , ["externalParameters" , "configSource" , "repository" ], str )
135139 if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1" :
136140 repo = json_extract (build_def , ["externalParameters" , "workflow" , "repository" ], str )
137141
138142 if not repo :
139- raise ProvenanceError ("Failed to extract repository URL from provenance." )
143+ logger .debug ("Repo required to extract commit from SLSA v1." )
144+ return None , None
140145
141146 # Extract the commit hash.
142- commit = ""
147+ commit = None
143148 deps = json_extract (build_def , ["resolvedDependencies" ], list )
149+ if not deps :
150+ return repo , None
144151 for dep in deps :
145152 if not isinstance (dep , dict ):
146153 continue
147154 uri = json_extract (dep , ["uri" ], str )
155+ if not uri :
156+ continue
148157 url = _clean_spdx (uri )
149158 if url != repo :
150159 continue
151160 digest_set = json_extract (dep , ["digest" ], dict )
161+ if not digest_set :
162+ continue
152163 commit = _extract_commit_from_digest_set (digest_set , SLSA_V1_DIGEST_SET_GIT_ALGORITHMS )
153164
154- if not commit :
155- raise ProvenanceError ("Failed to extract commit hash from provenance." )
165+ return repo , commit or None
156166
157- return repo , commit
158167
159-
160- def _extract_from_witness_provenance (payload : InTotoV01Payload ) -> tuple [str , str ]:
168+ def _extract_from_witness_provenance (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
161169 """Extract the repository and commit metadata from the witness provenance file found at the passed path.
162170
163171 To successfully return the commit and repository URL, the payload must respectively contain a Git attestation, and
@@ -175,11 +183,15 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
175183 """
176184 predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
177185 if not predicate :
178- raise ProvenanceError ("No predicate in payload statement." )
186+ logger .debug ("No predicate in payload statement." )
187+ return None , None
179188
180189 attestations = json_extract (predicate , ["attestations" ], list )
181- commit = ""
182- repo = ""
190+ if not attestations :
191+ return None , None
192+
193+ repo = None
194+ commit = None
183195 for entry in attestations :
184196 if not isinstance (entry , dict ):
185197 continue
@@ -193,10 +205,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
193205 ):
194206 repo = json_extract (entry , ["attestation" , "projecturl" ], str )
195207
196- if not commit or not repo :
197- raise ProvenanceError ("Could not extract repo and commit from provenance." )
198-
199- return repo , commit
208+ return repo or None , commit or None
200209
201210
202211def _extract_commit_from_digest_set (digest_set : dict [str , JsonType ], valid_algorithms : list [str ]) -> str :
@@ -212,7 +221,8 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor
212221 value = digest_set .get (key )
213222 if isinstance (value , str ):
214223 return value
215- raise ProvenanceError (f"No valid digest in digest set: { digest_set .keys ()} not in { valid_algorithms } " )
224+ logger .debug ("No valid digest in digest set: %s not in %s" , digest_set .keys (), valid_algorithms )
225+ return ""
216226
217227
218228def _clean_spdx (uri : str ) -> str :
0 commit comments