Skip to content

Commit 3e1e075

Browse files
authored
feat: improve VSA generation with digest for each subject (#685)
Signed-off-by: Nathan Nguyen <[email protected]>
1 parent 0b82e4f commit 3e1e075

File tree

13 files changed

+661
-202
lines changed

13 files changed

+661
-202
lines changed

src/macaron/artifact/maven.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module declares types and utilities for Maven artifacts."""
5+
6+
from packageurl import PackageURL
7+
8+
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
9+
from macaron.slsa_analyzer.provenance.intoto.v01 import InTotoV01Subject
10+
from macaron.slsa_analyzer.provenance.intoto.v1 import InTotoV1ResourceDescriptor
11+
from macaron.slsa_analyzer.provenance.witness import (
12+
extract_build_artifacts_from_witness_subjects,
13+
is_witness_provenance_payload,
14+
load_witness_verifier_config,
15+
)
16+
17+
18+
class MavenSubjectPURLMatcher:
19+
"""A matcher matching a PURL identifying a Maven artifact to a provenance subject."""
20+
21+
@staticmethod
22+
def get_subject_in_provenance_matching_purl(
23+
provenance_payload: InTotoPayload, purl: PackageURL
24+
) -> InTotoV01Subject | InTotoV1ResourceDescriptor | None:
25+
"""Get the subject in the provenance matching the PURL.
26+
27+
In this case where the provenance is assumed to be built from a Java project,
28+
the subject must be a Maven artifact.
29+
30+
Parameters
31+
----------
32+
provenance_payload : InTotoPayload
33+
The provenance payload.
34+
purl : PackageURL
35+
The PackageURL identifying the matching subject.
36+
37+
Returns
38+
-------
39+
InTotoV01Subject | InTotoV1ResourceDescriptor | None
40+
The subject in the provenance matching the given PURL.
41+
"""
42+
if not purl.namespace:
43+
return None
44+
if not purl.version:
45+
return None
46+
if purl.type != "maven":
47+
return None
48+
49+
if not is_witness_provenance_payload(
50+
payload=provenance_payload,
51+
predicate_types=load_witness_verifier_config().predicate_types,
52+
):
53+
return None
54+
artifact_subjects = extract_build_artifacts_from_witness_subjects(provenance_payload)
55+
56+
for subject in artifact_subjects:
57+
_, _, artifact_filename = subject["name"].rpartition("/")
58+
subject_purl = create_maven_purl_from_artifact_filename(
59+
artifact_filename=artifact_filename,
60+
group_id=purl.namespace,
61+
version=purl.version,
62+
)
63+
if subject_purl == purl:
64+
return subject
65+
66+
return None
67+
68+
69+
def create_maven_purl_from_artifact_filename(
70+
artifact_filename: str,
71+
group_id: str,
72+
version: str,
73+
) -> PackageURL | None:
74+
"""Create a Maven PackageURL given an artifact filename, a group id, and a version.
75+
76+
For reference, see:
77+
- https://maven.apache.org/ref/3.9.6/maven-core/artifact-handlers.html
78+
- https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst#maven
79+
Notes:
80+
- For the time being, we are only supporting the ``"type"`` qualifier, although the
81+
Maven section in the PackageURL docs also mention the ``"classifier"`` qualifier.
82+
This is because not all artifact types has a unique value of ``"classifier"``
83+
according to the Artifact Handlers table in the Maven Core reference. In addition,
84+
not supporting the ``"classifier"`` qualifier at the moment simplifies the
85+
implementation for PURL decoding and generation until there is a concrete use
86+
case for this additional qualifier.
87+
- We are only supporting only 4 artifact types: jar, pom, javadoc, and java-source.
88+
89+
Parameters
90+
----------
91+
artifact_filename : str
92+
The filename of the artifact.
93+
group_id : str
94+
The group id of the artifact.
95+
version : str
96+
The version of the artifact.
97+
98+
Returns
99+
-------
100+
PackageURL | None
101+
A Maven artifact PackageURL, or `None` if the filename does not follow any
102+
of the supported artifact name patters.
103+
"""
104+
# Each artifact name should follow the pattern "<artifact-id>-<suffix>"
105+
# where "<suffix>" is one of the following.
106+
suffix_to_purl_qualifiers = {
107+
f"-{version}.jar": {"type": "jar"},
108+
f"-{version}.pom": {"type": "pom"},
109+
f"-{version}-javadoc.jar": {"type": "javadoc"},
110+
f"-{version}-sources.jar": {"type": "java-source"},
111+
}
112+
113+
for suffix, purl_qualifiers in suffix_to_purl_qualifiers.items():
114+
if artifact_filename.endswith(suffix):
115+
artifact_id = artifact_filename[: -len(suffix)]
116+
return PackageURL(
117+
type="maven",
118+
namespace=group_id,
119+
name=artifact_id,
120+
version=version,
121+
qualifiers=purl_qualifiers,
122+
)
123+
124+
return None

src/macaron/database/table_definitions.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import string
1616
from datetime import datetime
1717
from pathlib import Path
18-
from typing import Any
18+
from typing import Any, Self
1919

2020
from packageurl import PackageURL
2121
from sqlalchemy import (
@@ -32,9 +32,11 @@
3232
)
3333
from sqlalchemy.orm import Mapped, mapped_column, relationship
3434

35+
from macaron.artifact.maven import MavenSubjectPURLMatcher
3536
from macaron.database.database_manager import ORMBase
3637
from macaron.database.rfc3339_datetime import RFC3339DateTime
3738
from macaron.errors import InvalidPURLError
39+
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher
3840
from macaron.slsa_analyzer.slsa_req import ReqName
3941

4042
logger: logging.Logger = logging.getLogger(__name__)
@@ -168,6 +170,13 @@ class Component(PackageURLMixin, ORMBase):
168170
secondaryjoin=components_association_table.c.child_component == id,
169171
)
170172

173+
#: The optional one-to-one relationship with a provenance subject in case this
174+
#: component represents a subject in a provenance.
175+
provenance_subject: Mapped["ProvenanceSubject | None"] = relationship(
176+
back_populates="component",
177+
lazy="immediate",
178+
)
179+
171180
def __init__(self, purl: str, analysis: Analysis, repository: "Repository | None"):
172181
"""
173182
Instantiate the software component using PURL identifier.
@@ -528,3 +537,71 @@ class HashDigest(ORMBase):
528537

529538
#: The many-to-one relationship with artifacts.
530539
artifact: Mapped["ReleaseArtifact"] = relationship(back_populates="digests", lazy="immediate")
540+
541+
542+
class ProvenanceSubject(ORMBase):
543+
"""A subject in a provenance that matches the user-provided PackageURL.
544+
545+
This subject may be later populated in VSAs during policy verification.
546+
"""
547+
548+
__tablename__ = "_provenance_subject"
549+
550+
#: The primary key.
551+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
552+
553+
#: The component id of the provenance subject.
554+
component_id: Mapped[int] = mapped_column(
555+
Integer,
556+
ForeignKey("_component.id"),
557+
nullable=False,
558+
)
559+
560+
#: The required one-to-one relationship with a component.
561+
component: Mapped[Component] = relationship(
562+
back_populates="provenance_subject",
563+
lazy="immediate",
564+
)
565+
566+
#: The SHA256 hash of the subject.
567+
sha256: Mapped[str] = mapped_column(String, nullable=False)
568+
569+
@classmethod
570+
def from_purl_and_provenance(
571+
cls,
572+
purl: PackageURL,
573+
provenance_payload: InTotoPayload,
574+
) -> Self | None:
575+
"""Create a ``ProvenanceSubject`` entry if there is a provenance subject matching the PURL.
576+
577+
Parameters
578+
----------
579+
purl : PackageURL
580+
The PackageURL identifying the software component being analyzed.
581+
provenance_payload : InTotoPayload
582+
The provenance payload.
583+
584+
Returns
585+
-------
586+
Self | None
587+
A ``ProvenanceSubject`` entry with the SHA256 digest of the provenance subject
588+
matching the given PURL.
589+
"""
590+
subject_artifact_types: list[ProvenanceSubjectPURLMatcher] = [MavenSubjectPURLMatcher]
591+
592+
for subject_artifact_type in subject_artifact_types:
593+
subject = subject_artifact_type.get_subject_in_provenance_matching_purl(
594+
provenance_payload,
595+
purl,
596+
)
597+
if subject is None:
598+
return None
599+
digest = subject["digest"]
600+
if digest is None:
601+
return None
602+
sha256 = digest.get("sha256")
603+
if not sha256:
604+
return None
605+
return cls(sha256=sha256)
606+
607+
return None

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from macaron.config.global_config import global_config
2020
from macaron.config.target_config import Configuration
2121
from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session
22-
from macaron.database.table_definitions import Analysis, Component, Repository
22+
from macaron.database.table_definitions import Analysis, Component, ProvenanceSubject, Repository
2323
from macaron.dependency_analyzer import DependencyAnalyzer, DependencyInfo
2424
from macaron.errors import (
2525
CloneError,
@@ -332,7 +332,12 @@ def run_single(
332332
# Create the component.
333333
component = None
334334
try:
335-
component = self.add_component(analysis, analysis_target, existing_records)
335+
component = self.add_component(
336+
analysis,
337+
analysis_target,
338+
existing_records,
339+
provenance_payload,
340+
)
336341
except PURLNotFoundError as error:
337342
logger.error(error)
338343
return Record(
@@ -484,6 +489,7 @@ def add_component(
484489
analysis: Analysis,
485490
analysis_target: AnalysisTarget,
486491
existing_records: dict[str, Record] | None = None,
492+
provenance_payload: InTotoPayload | None = None,
487493
) -> Component:
488494
"""Add a software component if it does not exist in the DB already.
489495
@@ -547,18 +553,30 @@ def add_component(
547553
raise PURLNotFoundError(
548554
f"The repository {analysis_target.repo_path} is not available and no PURL is provided from the user."
549555
)
550-
551-
repo_snapshot_purl = PackageURL(
556+
purl = PackageURL(
552557
type=repository.type,
553558
namespace=repository.owner,
554559
name=repository.name,
555560
version=repository.commit_sha,
556561
)
557-
return Component(purl=str(repo_snapshot_purl), analysis=analysis, repository=repository)
562+
else:
563+
# If the PURL is available, we always create the software component with it whether the repository is
564+
# available or not.
565+
purl = analysis_target.parsed_purl
566+
567+
component = Component(
568+
purl=str(purl),
569+
analysis=analysis,
570+
repository=repository,
571+
)
572+
573+
if provenance_payload:
574+
component.provenance_subject = ProvenanceSubject.from_purl_and_provenance(
575+
purl=purl,
576+
provenance_payload=provenance_payload,
577+
)
558578

559-
# If the PURL is available, we always create the software component with it whether the repository is
560-
# available or not.
561-
return Component(purl=str(analysis_target.parsed_purl), analysis=analysis, repository=repository)
579+
return component
562580

563581
@staticmethod
564582
def parse_purl(config: Configuration) -> PackageURL | None:

src/macaron/slsa_analyzer/checks/provenance_available_check.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ProvenanceAvailableFacts(CheckFacts):
5757
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
5858

5959
#: The provenance asset name.
60-
asset_name: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT})
60+
asset_name: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.TEXT})
6161

6262
#: The URL for the provenance asset.
6363
asset_url: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.HREF})
@@ -504,6 +504,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
504504
CheckResultData
505505
The result of the check.
506506
"""
507+
if ctx.dynamic_data["provenance"]:
508+
return CheckResultData(
509+
result_tables=[ProvenanceAvailableFacts(confidence=Confidence.HIGH)],
510+
result_type=CheckResultType.PASSED,
511+
)
512+
507513
provenance_extensions = defaults.get_list(
508514
"slsa.verifier",
509515
"provenance_extensions",

src/macaron/slsa_analyzer/checks/provenance_l3_content_check.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
5858
logger.info("%s check was unable to find any expectations.", self.check_info.check_id)
5959
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)
6060

61+
if ctx.dynamic_data["provenance"]:
62+
if expectation.validate(ctx.dynamic_data["provenance"]):
63+
return CheckResultData(
64+
result_tables=[expectation],
65+
result_type=CheckResultType.PASSED,
66+
)
67+
return CheckResultData(
68+
result_tables=[expectation],
69+
result_type=CheckResultType.FAILED,
70+
)
71+
6172
package_registry_info_entries = ctx.dynamic_data["package_registries"]
6273
ci_services = ctx.dynamic_data["ci_services"]
6374

0 commit comments

Comments
 (0)