88from functools import partial
99
1010from packageurl import PackageURL
11+ from pydriller import Git
1112
1213from macaron .config .defaults import defaults
1314from macaron .repo_finder .commit_finder import AbstractPurlType , determine_abstract_purl_type
15+ from macaron .slsa_analyzer .analyze_context import AnalyzeContext
1416from macaron .slsa_analyzer .checks .provenance_available_check import ProvenanceAvailableException
17+ from macaron .slsa_analyzer .ci_service import GitHubActions
18+ from macaron .slsa_analyzer .ci_service .base_ci_service import NoneCIService
1519from macaron .slsa_analyzer .package_registry import PACKAGE_REGISTRIES , JFrogMavenRegistry , NPMRegistry
1620from macaron .slsa_analyzer .package_registry .npm_registry import NPMAttestationAsset
1721from macaron .slsa_analyzer .provenance .intoto import InTotoPayload
1822from macaron .slsa_analyzer .provenance .intoto .errors import LoadIntotoAttestationError
1923from macaron .slsa_analyzer .provenance .loader import load_provenance_payload
24+ from macaron .slsa_analyzer .provenance .slsa import SLSAProvenanceData
2025from macaron .slsa_analyzer .provenance .witness import is_witness_provenance_payload , load_witness_verifier_config
26+ from macaron .slsa_analyzer .specs .ci_spec import CIInfo
2127
2228logger : logging .Logger = logging .getLogger (__name__ )
2329
@@ -49,6 +55,8 @@ def find_provenance(self, purl: PackageURL) -> list[InTotoPayload]:
4955 list[InTotoPayload]
5056 The provenance payload, or an empty list if not found.
5157 """
58+ logger .debug ("Seeking provenance of: %s" , purl )
59+
5260 if determine_abstract_purl_type (purl ) == AbstractPurlType .REPOSITORY :
5361 # Do not perform default discovery for repository type targets.
5462 return []
@@ -331,7 +339,8 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
331339 logger .error (msg )
332340 raise ProvenanceAvailableException (msg )
333341
334- provenance_filepaths = []
342+ provenances = []
343+ witness_verifier_config = load_witness_verifier_config ()
335344 try :
336345 with tempfile .TemporaryDirectory () as temp_dir :
337346 for provenance_asset in provenance_assets :
@@ -342,28 +351,181 @@ def find_gav_provenance(purl: PackageURL, registry: JFrogMavenRegistry) -> list[
342351 provenance_asset .name ,
343352 )
344353 continue
345- provenance_filepaths .append (provenance_filepath )
346- except OSError as error :
347- logger .error ("Error while storing provenance in the temporary directory: %s" , error )
348-
349- provenances = []
350- witness_verifier_config = load_witness_verifier_config ()
351354
352- for provenance_filepath in provenance_filepaths :
353- try :
354- provenance_payload = load_provenance_payload (provenance_filepath )
355- except LoadIntotoAttestationError as error :
356- logger .error ("Error while loading provenance: %s" , error )
357- continue
355+ try :
356+ provenance_payload = load_provenance_payload (provenance_filepath )
357+ except LoadIntotoAttestationError as load_error :
358+ logger .error ("Error while loading provenance: %s" , load_error )
359+ continue
358360
359- if not is_witness_provenance_payload (provenance_payload , witness_verifier_config .predicate_types ):
360- continue
361+ if not is_witness_provenance_payload (provenance_payload , witness_verifier_config .predicate_types ):
362+ continue
361363
362- provenances .append (provenance_payload )
364+ provenances .append (provenance_payload )
365+ except OSError as error :
366+ logger .error ("Error while storing provenance in the temporary directory: %s" , error )
363367
364368 if not provenances :
365369 logger .debug ("No payloads found in provenance files." )
366370 return []
367371
368372 # We assume that there is only one provenance per GAV.
369373 return provenances [:1 ]
374+
375+
376+ def find_provenance_from_ci (analyze_ctx : AnalyzeContext , git_obj : Git | None ) -> InTotoPayload | None :
377+ """Try to find provenance from CI services of the repository.
378+
379+ Note that we stop going through the CI services once we encounter a CI service
380+ that does host provenance assets.
381+
382+ This method also loads the provenance payloads into the ``CIInfo`` object where
383+ the provenance assets are found.
384+
385+ Parameters
386+ ----------
387+ analyze_ctx: AnalyzeContext
388+ The contenxt of the ongoing analysis.
389+ git_obj: Git | None
390+ The Pydriller Git object representing the repository, if any.
391+
392+ Returns
393+ -------
394+ InTotoPayload | None
395+ The provenance payload, or None if not found.
396+ """
397+ provenance_extensions = defaults .get_list (
398+ "slsa.verifier" ,
399+ "provenance_extensions" ,
400+ fallback = ["intoto.jsonl" ],
401+ )
402+ component = analyze_ctx .component
403+ ci_info_entries = analyze_ctx .dynamic_data ["ci_services" ]
404+
405+ if not component .repository :
406+ logger .debug ("Unable to find a provenance because a repository was not found for %s." , component .purl )
407+ return None
408+
409+ repo_full_name = component .repository .full_name
410+ for ci_info in ci_info_entries :
411+ ci_service = ci_info ["service" ]
412+
413+ if isinstance (ci_service , NoneCIService ):
414+ continue
415+
416+ if isinstance (ci_service , GitHubActions ):
417+ # Find the release for the software component version being analyzed.
418+ digest = component .repository .commit_sha
419+ tag = None
420+ if git_obj :
421+ # Use the software component commit to find the tag.
422+ if not digest :
423+ logger .debug ("Cannot retrieve asset provenance without commit digest." )
424+ return None
425+ tags = git_obj .repo .tags
426+ for _tag in tags :
427+ try :
428+ tag_commit = str (_tag .commit )
429+ except ValueError as error :
430+ logger .debug ("Commit of tag is a blob or tree: %s" , error )
431+ continue
432+ if tag_commit and tag_commit == digest :
433+ tag = str (_tag )
434+ break
435+
436+ if not tag :
437+ logger .debug ("Could not find the tag matching commit: %s" , digest )
438+ return None
439+
440+ # Get the correct release using the tag.
441+ release_payload = ci_service .api_client .get_release_by_tag (repo_full_name , tag )
442+ if not release_payload :
443+ logger .debug ("Failed to find release matching tag: %s" , tag )
444+ return None
445+
446+ # Store the release data for other checks.
447+ ci_info ["release" ] = release_payload
448+
449+ # Get the provenance assets.
450+ for prov_ext in provenance_extensions :
451+ provenance_assets = ci_service .api_client .fetch_assets (
452+ release_payload ,
453+ ext = prov_ext ,
454+ )
455+ if not provenance_assets :
456+ continue
457+
458+ logger .info ("Found the following provenance assets:" )
459+ for provenance_asset in provenance_assets :
460+ logger .info ("* %s" , provenance_asset .url )
461+
462+ # Store the provenance assets for other checks.
463+ ci_info ["provenance_assets" ].extend (provenance_assets )
464+
465+ # Download the provenance assets and load the provenance payloads.
466+ download_provenances_from_github_actions_ci_service (
467+ ci_info ,
468+ )
469+
470+ # TODO consider how to handle multiple payloads here.
471+ return ci_info ["provenances" ][0 ].payload if ci_info ["provenances" ] else None
472+
473+ else :
474+ logger .debug ("CI service not supported for provenance finding: %s" , ci_service .name )
475+
476+ return None
477+
478+
479+ def download_provenances_from_github_actions_ci_service (ci_info : CIInfo ) -> None :
480+ """Download provenances from GitHub Actions.
481+
482+ Parameters
483+ ----------
484+ ci_info: CIInfo,
485+ A ``CIInfo`` instance that holds a GitHub Actions git service object.
486+ """
487+ ci_service = ci_info ["service" ]
488+ prov_assets = ci_info ["provenance_assets" ]
489+
490+ try :
491+ with tempfile .TemporaryDirectory () as temp_path :
492+ downloaded_provs = []
493+ for prov_asset in prov_assets :
494+ # Check the size before downloading.
495+ if prov_asset .size_in_bytes > defaults .getint (
496+ "slsa.verifier" ,
497+ "max_download_size" ,
498+ fallback = 1000000 ,
499+ ):
500+ logger .info (
501+ "Skip verifying the provenance %s: asset size too large." ,
502+ prov_asset .name ,
503+ )
504+ continue
505+
506+ provenance_filepath = os .path .join (temp_path , prov_asset .name )
507+
508+ if not ci_service .api_client .download_asset (
509+ prov_asset .url ,
510+ provenance_filepath ,
511+ ):
512+ logger .debug (
513+ "Could not download the provenance %s. Skip verifying..." ,
514+ prov_asset .name ,
515+ )
516+ continue
517+
518+ # Read the provenance.
519+ try :
520+ payload = load_provenance_payload (provenance_filepath )
521+ except LoadIntotoAttestationError as error :
522+ logger .error ("Error logging provenance: %s" , error )
523+ continue
524+
525+ # Add the provenance file.
526+ downloaded_provs .append (SLSAProvenanceData (payload = payload , asset = prov_asset ))
527+
528+ # Persist the provenance payloads into the CIInfo object.
529+ ci_info ["provenances" ] = downloaded_provs
530+ except OSError as error :
531+ logger .error ("Error while storing provenance in the temporary directory: %s" , error )
0 commit comments