Skip to content

Commit 228a4b6

Browse files
committed
feat: improve vsa generation with digest for each subject
Signed-off-by: Nathan Nguyen <[email protected]>
1 parent f21bb6f commit 228a4b6

File tree

13 files changed

+760
-195
lines changed

13 files changed

+760
-195
lines changed

src/macaron/artifact/maven.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module declares types and utilities for Maven artifacts."""
5+
6+
import re
7+
from dataclasses import dataclass
8+
from enum import Enum
9+
from typing import NamedTuple, Self
10+
11+
from packageurl import PackageURL
12+
13+
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
14+
from macaron.slsa_analyzer.provenance.intoto.v01 import InTotoV01Subject
15+
from macaron.slsa_analyzer.provenance.intoto.v1 import InTotoV1ResourceDescriptor
16+
from macaron.slsa_analyzer.provenance.witness import (
17+
extract_build_artifacts_from_witness_subjects,
18+
is_witness_provenance_payload,
19+
load_witness_verifier_config,
20+
)
21+
22+
23+
class _MavenArtifactType(NamedTuple):
24+
filename_pattern: str
25+
purl_qualifiers: dict[str, str]
26+
27+
28+
class MavenArtifactType(_MavenArtifactType, Enum):
29+
"""Maven artifact types that Macaron supports.
30+
31+
For reference, see:
32+
- https://maven.apache.org/ref/3.9.6/maven-core/artifact-handlers.html
33+
- https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst#maven
34+
"""
35+
36+
# Enum with custom value type.
37+
# See https://docs.python.org/3.10/library/enum.html#others.
38+
JAR = _MavenArtifactType(
39+
filename_pattern="{artifact_id}-{version}.jar",
40+
purl_qualifiers={"type": "jar"},
41+
)
42+
POM = _MavenArtifactType(
43+
filename_pattern="{artifact_id}-{version}.pom",
44+
purl_qualifiers={"type": "pom"},
45+
)
46+
JAVADOC = _MavenArtifactType(
47+
filename_pattern="{artifact_id}-{version}-javadoc.jar",
48+
purl_qualifiers={"type": "javadoc"},
49+
)
50+
JAVA_SOURCE = _MavenArtifactType(
51+
filename_pattern="{artifact_id}-{version}-sources.jar",
52+
purl_qualifiers={"type": "sources"},
53+
)
54+
55+
56+
@dataclass
57+
class MavenArtifact:
58+
"""A Maven artifact."""
59+
60+
group_id: str
61+
artifact_id: str
62+
version: str
63+
artifact_type: MavenArtifactType
64+
65+
@property
66+
def package_url(self) -> PackageURL:
67+
"""Get the PackageURL of this Maven artifact."""
68+
return PackageURL(
69+
type="maven",
70+
namespace=self.group_id,
71+
name=self.artifact_id,
72+
version=self.version,
73+
qualifiers=self.artifact_type.purl_qualifiers,
74+
)
75+
76+
@classmethod
77+
def from_package_url(cls, package_url: PackageURL) -> Self | None:
78+
"""Create a Maven artifact from a PackageURL.
79+
80+
Returns
81+
-------
82+
Self | None
83+
A Maven artifact, or ``None`` if the PURL is not a valid Maven artifact PURL, or if
84+
the artifact type is not supported.
85+
For supported artifact types, see :class:`MavenArtifactType`.
86+
"""
87+
if not package_url.namespace:
88+
return None
89+
if not package_url.version:
90+
return None
91+
if package_url.type != "maven":
92+
return None
93+
maven_artifact_type = None
94+
for artifact_type in MavenArtifactType:
95+
if artifact_type.purl_qualifiers == package_url.qualifiers:
96+
maven_artifact_type = artifact_type
97+
break
98+
if not maven_artifact_type:
99+
return None
100+
return cls(
101+
group_id=package_url.namespace,
102+
artifact_id=package_url.name,
103+
version=package_url.version,
104+
artifact_type=maven_artifact_type,
105+
)
106+
107+
@classmethod
108+
def from_artifact_name(
109+
cls,
110+
artifact_name: str,
111+
group_id: str,
112+
version: str,
113+
) -> Self | None:
114+
"""Create a Maven artifact from a PackageURL.
115+
116+
Returns
117+
-------
118+
Self | None
119+
A Maven artifact, or ``None`` if the PURL is not a valid Maven artifact PURL, or if
120+
the artifact type is not supported.
121+
For supported artifact types, see :class:`MavenArtifactType`.
122+
"""
123+
for maven_artifact_type in MavenArtifactType:
124+
pattern = maven_artifact_type.filename_pattern.format(
125+
artifact_id="(.*)",
126+
version=version,
127+
)
128+
match_result = re.search(pattern, artifact_name)
129+
if not match_result:
130+
continue
131+
artifact_id = match_result.group(1)
132+
return cls(
133+
group_id=group_id,
134+
artifact_id=artifact_id,
135+
version=version,
136+
artifact_type=maven_artifact_type,
137+
)
138+
return None
139+
140+
141+
class MavenSubjectPURLMatcher:
142+
"""A matcher matching a PURL identifying a Maven artifact to a provenance subject."""
143+
144+
@staticmethod
145+
def get_subject_in_provenance_matching_purl(
146+
provenance_payload: InTotoPayload, purl: PackageURL
147+
) -> InTotoV01Subject | InTotoV1ResourceDescriptor | None:
148+
"""Get the subject in the provenance matching the PURL.
149+
150+
In this case where the provenance is assumed to be built from a Java project,
151+
the subject must be a Maven artifact.
152+
153+
Parameters
154+
----------
155+
provenance_payload : InTotoPayload
156+
The provenance payload.
157+
purl : PackageURL
158+
The PackageURL identifying the matching subject.
159+
160+
Returns
161+
-------
162+
InTotoV01Subject
163+
The PackageURL identifying the matching subject.
164+
"""
165+
if (maven_artifact := MavenArtifact.from_package_url(purl)) and is_witness_provenance_payload(
166+
payload=provenance_payload,
167+
predicate_types=load_witness_verifier_config().predicate_types,
168+
):
169+
artifact_subjects = extract_build_artifacts_from_witness_subjects(provenance_payload)
170+
171+
maven_artifact_subject_pairs = []
172+
for subject in artifact_subjects:
173+
_, _, artifact_name = subject["name"].rpartition("/")
174+
artifact = MavenArtifact.from_artifact_name(
175+
artifact_name=artifact_name,
176+
group_id=maven_artifact.group_id,
177+
version=maven_artifact.version,
178+
)
179+
if artifact is None:
180+
continue
181+
maven_artifact_subject_pairs.append((artifact, subject))
182+
183+
for artifact, subject in maven_artifact_subject_pairs:
184+
if artifact.package_url == purl:
185+
return subject
186+
187+
return None

src/macaron/database/table_definitions.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import string
1616
from datetime import datetime
1717
from pathlib import Path
18-
from typing import Any
18+
from typing import Any, Self
1919

2020
from packageurl import PackageURL
2121
from sqlalchemy import (
@@ -32,9 +32,11 @@
3232
)
3333
from sqlalchemy.orm import Mapped, mapped_column, relationship
3434

35+
from macaron.artifact.maven import MavenSubjectPURLMatcher
3536
from macaron.database.database_manager import ORMBase
3637
from macaron.database.rfc3339_datetime import RFC3339DateTime
3738
from macaron.errors import InvalidPURLError
39+
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher
3840
from macaron.slsa_analyzer.slsa_req import ReqName
3941

4042
logger: logging.Logger = logging.getLogger(__name__)
@@ -168,6 +170,13 @@ class Component(PackageURLMixin, ORMBase):
168170
secondaryjoin=components_association_table.c.child_component == id,
169171
)
170172

173+
#: The optional one-to-one relationship with a provenance subject in case this
174+
#: component represents a subject in a provenance.
175+
provenance_subject: Mapped["ProvenanceSubject | None"] = relationship(
176+
back_populates="component",
177+
lazy="immediate",
178+
)
179+
171180
def __init__(self, purl: str, analysis: Analysis, repository: "Repository | None"):
172181
"""
173182
Instantiate the software component using PURL identifier.
@@ -528,3 +537,71 @@ class HashDigest(ORMBase):
528537

529538
#: The many-to-one relationship with artifacts.
530539
artifact: Mapped["ReleaseArtifact"] = relationship(back_populates="digests", lazy="immediate")
540+
541+
542+
class ProvenanceSubject(ORMBase):
543+
"""A subject in a provenance that matches the user-provided PackageURL.
544+
545+
This subject may be later populated in VSAs during policy verification.
546+
"""
547+
548+
__tablename__ = "_provenance_subject"
549+
550+
#: The primary key.
551+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003
552+
553+
#: The component id of the provenance subject.
554+
component_id: Mapped[int] = mapped_column(
555+
Integer,
556+
ForeignKey("_component.id"),
557+
nullable=False,
558+
)
559+
560+
#: The required one-to-one relationship with a component.
561+
component: Mapped[Component] = relationship(
562+
back_populates="provenance_subject",
563+
lazy="immediate",
564+
)
565+
566+
#: The SHA256 hash of the subject.
567+
sha256: Mapped[str] = mapped_column(String, nullable=False)
568+
569+
@classmethod
570+
def from_purl_and_provenance(
571+
cls,
572+
purl: PackageURL,
573+
provenance_payload: InTotoPayload,
574+
) -> Self | None:
575+
"""Create a ``ProvenanceSubject`` entry if there is a provenance subject matching the PURL.
576+
577+
Parameters
578+
----------
579+
purl : PackageURL
580+
The PackageURL identifying the software component being analyzed.
581+
provenance_payload : InTotoPayload
582+
The provenance payload.
583+
584+
Returns
585+
-------
586+
Self | None
587+
A ``ProvenanceSubject`` entry with the SHA256 of the provenance subject matching the
588+
given PURL.
589+
"""
590+
subject_artifact_types: list[ProvenanceSubjectPURLMatcher] = [MavenSubjectPURLMatcher]
591+
592+
for subject_artifact_type in subject_artifact_types:
593+
subject = subject_artifact_type.get_subject_in_provenance_matching_purl(
594+
provenance_payload,
595+
purl,
596+
)
597+
if subject is None:
598+
return None
599+
digest = subject["digest"]
600+
if digest is None:
601+
return None
602+
sha256 = digest.get("sha256")
603+
if not sha256:
604+
return None
605+
return cls(sha256=sha256)
606+
607+
return None

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from macaron.config.global_config import global_config
2020
from macaron.config.target_config import Configuration
2121
from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session
22-
from macaron.database.table_definitions import Analysis, Component, Repository
22+
from macaron.database.table_definitions import Analysis, Component, ProvenanceSubject, Repository
2323
from macaron.dependency_analyzer import DependencyAnalyzer, DependencyInfo
2424
from macaron.errors import (
2525
CloneError,
@@ -332,7 +332,12 @@ def run_single(
332332
# Create the component.
333333
component = None
334334
try:
335-
component = self.add_component(analysis, analysis_target, existing_records)
335+
component = self.add_component(
336+
analysis,
337+
analysis_target,
338+
existing_records,
339+
provenance_payload,
340+
)
336341
except PURLNotFoundError as error:
337342
logger.error(error)
338343
return Record(
@@ -484,6 +489,7 @@ def add_component(
484489
analysis: Analysis,
485490
analysis_target: AnalysisTarget,
486491
existing_records: dict[str, Record] | None = None,
492+
provenance_payload: InTotoPayload | None = None,
487493
) -> Component:
488494
"""Add a software component if it does not exist in the DB already.
489495
@@ -547,18 +553,30 @@ def add_component(
547553
raise PURLNotFoundError(
548554
f"The repository {analysis_target.repo_path} is not available and no PURL is provided from the user."
549555
)
550-
551-
repo_snapshot_purl = PackageURL(
556+
purl = PackageURL(
552557
type=repository.type,
553558
namespace=repository.owner,
554559
name=repository.name,
555560
version=repository.commit_sha,
556561
)
557-
return Component(purl=str(repo_snapshot_purl), analysis=analysis, repository=repository)
562+
else:
563+
# If the PURL is available, we always create the software component with it whether the repository is
564+
# available or not.
565+
purl = analysis_target.parsed_purl
566+
567+
component = Component(
568+
purl=purl.to_string(),
569+
analysis=analysis,
570+
repository=repository,
571+
)
572+
573+
if provenance_payload:
574+
component.provenance_subject = ProvenanceSubject.from_purl_and_provenance(
575+
purl=purl,
576+
provenance_payload=provenance_payload,
577+
)
558578

559-
# If the PURL is available, we always create the software component with it whether the repository is
560-
# available or not.
561-
return Component(purl=str(analysis_target.parsed_purl), analysis=analysis, repository=repository)
579+
return component
562580

563581
@staticmethod
564582
def parse_purl(config: Configuration) -> PackageURL | None:

src/macaron/slsa_analyzer/checks/provenance_available_check.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ProvenanceAvailableFacts(CheckFacts):
5757
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
5858

5959
#: The provenance asset name.
60-
asset_name: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT})
60+
asset_name: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.TEXT})
6161

6262
#: The URL for the provenance asset.
6363
asset_url: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.HREF})
@@ -504,6 +504,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
504504
CheckResultData
505505
The result of the check.
506506
"""
507+
if ctx.dynamic_data["provenance"]:
508+
return CheckResultData(
509+
result_tables=[ProvenanceAvailableFacts(confidence=Confidence.HIGH)],
510+
result_type=CheckResultType.PASSED,
511+
)
512+
507513
provenance_extensions = defaults.get_list(
508514
"slsa.verifier",
509515
"provenance_extensions",

src/macaron/slsa_analyzer/checks/provenance_l3_content_check.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
5858
logger.info("%s check was unable to find any expectations.", self.check_info.check_id)
5959
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)
6060

61+
if ctx.dynamic_data["provenance"] and expectation.validate(ctx.dynamic_data["provenance"]):
62+
return CheckResultData(
63+
result_tables=[expectation],
64+
result_type=CheckResultType.PASSED,
65+
)
66+
6167
package_registry_info_entries = ctx.dynamic_data["package_registries"]
6268
ci_services = ctx.dynamic_data["ci_services"]
6369

0 commit comments

Comments
 (0)