Skip to content

Commit 7313899

Browse files
authored
fix: report known malware even when not labeled (#956)
Signed-off-by: behnazh-w <[email protected]>
1 parent 4ed5561 commit 7313899

File tree

15 files changed

+326
-75
lines changed

15 files changed

+326
-75
lines changed

docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.package_registry.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ macaron.slsa\_analyzer.package\_registry package
99
Submodules
1010
----------
1111

12+
macaron.slsa\_analyzer.package\_registry.deps\_dev module
13+
---------------------------------------------------------
14+
15+
.. automodule:: macaron.slsa_analyzer.package_registry.deps_dev
16+
:members:
17+
:undoc-members:
18+
:show-inheritance:
19+
1220
macaron.slsa\_analyzer.package\_registry.jfrog\_maven\_registry module
1321
----------------------------------------------------------------------
1422

src/macaron/config/defaults.ini

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,11 @@ fileserver_url_scheme = https
540540
inspector_url_netloc = inspector.pypi.io
541541
inspector_url_scheme = https
542542

543+
[deps_dev]
544+
url_netloc = api.deps.dev
545+
url_scheme = https
546+
purl_endpoint = v3alpha/purl
547+
543548
# Configuration options for selecting the checks to run.
544549
# Both the exclude and include are defined as list of strings:
545550
# - The exclude list is used to specify the checks that will not run.

src/macaron/errors.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module contains error classes for Macaron."""
@@ -56,6 +56,17 @@ class InvalidHTTPResponseError(MacaronError):
5656
"""Happens when the HTTP response is invalid or unexpected."""
5757

5858

59+
class APIAccessError(MacaronError):
60+
"""Happens when a service API cannot be accessed.
61+
62+
Reasons can include:
63+
* misconfiguration issues
64+
* invalid API request
65+
* network errors
66+
* unexpected response returned by the API
67+
"""
68+
69+
5970
class CheckRegistryError(MacaronError):
6071
"""The Check Registry Error class."""
6172

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from macaron.slsa_analyzer.build_tool.poetry import Poetry
3030
from macaron.slsa_analyzer.checks.base_check import BaseCheck
3131
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
32+
from macaron.slsa_analyzer.package_registry.deps_dev import APIAccessError, DepsDevService
3233
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
3334
from macaron.slsa_analyzer.registry import registry
3435
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
@@ -182,7 +183,7 @@ def __init__(self) -> None:
182183
"""Initialize a check instance."""
183184
check_id = "mcn_detect_malicious_metadata_1"
184185
description = """This check analyzes the metadata of a package based on reports malicious behavior.
185-
Supported ecosystem: PyPI.
186+
Supported ecosystem for unknown malware: PyPI.
186187
"""
187188
super().__init__(check_id=check_id, description=description, eval_reqs=[])
188189

@@ -288,37 +289,46 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
288289
The result of the check.
289290
"""
290291
result_tables: list[CheckFacts] = []
291-
# First check if this package is a known malware
292+
package_registry_info_entries = ctx.dynamic_data["package_registries"]
292293

294+
# First check if this package is a known malware
293295
data = {"package": {"purl": ctx.component.purl}}
294-
response = send_post_http_raw(self.osv_query_url, json_data=data, headers=None)
295-
res_obj = None
296-
if response:
297-
try:
298-
res_obj = response.json()
299-
except requests.exceptions.JSONDecodeError as error:
300-
logger.debug("Unable to get a valid response from %s: %s", self.osv_query_url, error)
301-
if res_obj:
302-
for vuln in res_obj.get("vulns", {}):
303-
v_id = json_extract(vuln, ["id"], str)
304-
if v_id and v_id.startswith("MAL-"):
305-
result_tables.append(
306-
MaliciousMetadataFacts(
307-
known_malware=f"https://osv.dev/vulnerability/{v_id}",
308-
result={},
309-
detail_information=vuln,
310-
confidence=Confidence.HIGH,
296+
297+
try:
298+
package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
299+
except APIAccessError as error:
300+
logger.debug(error)
301+
302+
# Known malicious packages must have been removed.
303+
if not package_exists:
304+
response = send_post_http_raw(self.osv_query_url, json_data=data, headers=None)
305+
res_obj = None
306+
if response:
307+
try:
308+
res_obj = response.json()
309+
except requests.exceptions.JSONDecodeError as error:
310+
logger.debug("Unable to get a valid response from %s: %s", self.osv_query_url, error)
311+
if res_obj:
312+
for vuln in res_obj.get("vulns", {}):
313+
if v_id := json_extract(vuln, ["id"], str):
314+
result_tables.append(
315+
MaliciousMetadataFacts(
316+
known_malware=f"https://osv.dev/vulnerability/{v_id}",
317+
result={},
318+
detail_information=vuln,
319+
confidence=Confidence.HIGH,
320+
)
311321
)
322+
if result_tables:
323+
return CheckResultData(
324+
result_tables=result_tables,
325+
result_type=CheckResultType.FAILED,
312326
)
313-
if result_tables:
314-
return CheckResultData(
315-
result_tables=result_tables,
316-
result_type=CheckResultType.FAILED,
317-
)
318327

319-
package_registry_info_entries = ctx.dynamic_data["package_registries"]
328+
# If the package is not a known malware, run malware analysis heuristics.
320329
for package_registry_info_entry in package_registry_info_entries:
321330
match package_registry_info_entry:
331+
# Currently, only PyPI packages are supported.
322332
case PackageRegistryInfo(
323333
build_tool=Pip() | Poetry(),
324334
package_registry=PyPIRegistry() as pypi_registry,
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module contains implementation of deps.dev service."""
5+
6+
import json
7+
import logging
8+
import urllib.parse
9+
from json.decoder import JSONDecodeError
10+
from urllib.parse import quote as encode
11+
12+
from macaron.config.defaults import defaults
13+
from macaron.errors import APIAccessError
14+
from macaron.util import send_get_http_raw
15+
16+
logger: logging.Logger = logging.getLogger(__name__)
17+
18+
19+
class DepsDevService:
20+
"""The deps.dev service class."""
21+
22+
@staticmethod
23+
def get_package_info(purl: str) -> dict | None:
24+
"""Check if the package identified by the PackageURL (PURL) exists and return its information.
25+
26+
Parameters
27+
----------
28+
purl: str
29+
The PackageURL (PURL).
30+
31+
Returns
32+
-------
33+
dict | None
34+
The package metadata or None if it doesn't exist.
35+
36+
Raises
37+
------
38+
APIAccessError
39+
If the service is misconfigured, the API is invalid, a network error happens,
40+
or unexpected response is returned by the API.
41+
"""
42+
section_name = "deps_dev"
43+
if not defaults.has_section(section_name):
44+
return None
45+
section = defaults[section_name]
46+
47+
url_netloc = section.get("url_netloc")
48+
if not url_netloc:
49+
raise APIAccessError(
50+
f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.'
51+
)
52+
url_scheme = section.get("url_scheme", "https")
53+
purl_endpoint = section.get("purl_endpoint")
54+
if not purl_endpoint:
55+
raise APIAccessError(
56+
f'The "purl_endpoint" key is missing in section [{section_name}] of the .ini configuration file.'
57+
)
58+
59+
path_params = "/".join([purl_endpoint, encode(purl, safe="")])
60+
try:
61+
url = urllib.parse.urlunsplit(
62+
urllib.parse.SplitResult(
63+
scheme=url_scheme,
64+
netloc=url_netloc,
65+
path=path_params,
66+
query="",
67+
fragment="",
68+
)
69+
)
70+
except ValueError as error:
71+
raise APIAccessError("Failed to construct the API URL.") from error
72+
73+
response = send_get_http_raw(url)
74+
if response and response.text:
75+
try:
76+
metadata: dict = json.loads(response.text)
77+
except JSONDecodeError as error:
78+
raise APIAccessError(f"Failed to process response from deps.dev for {url}.") from error
79+
if not metadata:
80+
raise APIAccessError(f"Empty response returned by {url} .")
81+
return metadata
82+
83+
return None

src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""Assets on a package registry."""
@@ -816,7 +816,7 @@ def download_asset(self, url: str, dest: str) -> bool:
816816

817817
return True
818818

819-
def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
819+
def find_publish_timestamp(self, purl: str) -> datetime:
820820
"""Make a search request to Maven Central to find the publishing timestamp of an artifact.
821821
822822
The reason for directly fetching timestamps from Maven Central is that deps.dev occasionally
@@ -829,8 +829,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
829829
purl: str
830830
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
831831
This should conform to the PURL specification.
832-
registry_url: str | None
833-
The registry URL that can be set for testing.
834832
835833
Returns
836834
-------

src/macaron/slsa_analyzer/package_registry/maven_central_registry.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""The module provides abstractions for the Maven Central package registry."""
@@ -182,7 +182,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
182182
compatible_build_tool_classes = [Maven, Gradle]
183183
return any(isinstance(build_tool, build_tool_class) for build_tool_class in compatible_build_tool_classes)
184184

185-
def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
185+
def find_publish_timestamp(self, purl: str) -> datetime:
186186
"""Make a search request to Maven Central to find the publishing timestamp of an artifact.
187187
188188
The reason for directly fetching timestamps from Maven Central is that deps.dev occasionally
@@ -195,8 +195,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
195195
purl: str
196196
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
197197
This should conform to the PURL specification.
198-
registry_url: str | None
199-
The registry URL that can be set for testing.
200198
201199
Returns
202200
-------

src/macaron/slsa_analyzer/package_registry/package_registry.py

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
1-
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module defines package registries."""
55

6-
import json
76
import logging
8-
import urllib.parse
97
from abc import ABC, abstractmethod
108
from datetime import datetime
11-
from urllib.parse import quote as encode
12-
13-
import requests
149

1510
from macaron.errors import InvalidHTTPResponseError
1611
from macaron.json_tools import json_extract
1712
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
18-
from macaron.util import send_get_http_raw
13+
from macaron.slsa_analyzer.package_registry.deps_dev import APIAccessError, DepsDevService
1914

2015
logger: logging.Logger = logging.getLogger(__name__)
2116

@@ -50,7 +45,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
5045
based on the given build tool.
5146
"""
5247

53-
def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
48+
def find_publish_timestamp(self, purl: str) -> datetime:
5449
"""Retrieve the publication timestamp for a package specified by its purl from the deps.dev repository by default.
5550
5651
This method constructs a request URL based on the provided purl, sends an HTTP GET
@@ -65,8 +60,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
6560
purl: str
6661
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
6762
This should conform to the PURL specification.
68-
registry_url: str | None
69-
The registry URL that can be set for testing.
7063
7164
Returns
7265
-------
@@ -86,40 +79,20 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
8679
# in the AnalyzeContext object retrieved by the Repo Finder. This step should be
8780
# implemented at the beginning of the analyze command to ensure that the data
8881
# is available for subsequent processing.
89-
90-
base_url_parsed = urllib.parse.urlparse(registry_url or "https://api.deps.dev")
91-
path_params = "/".join(["v3alpha", "purl", encode(purl, safe="")])
9282
try:
93-
url = urllib.parse.urlunsplit(
94-
urllib.parse.SplitResult(
95-
scheme=base_url_parsed.scheme,
96-
netloc=base_url_parsed.netloc,
97-
path=path_params,
98-
query="",
99-
fragment="",
100-
)
101-
)
102-
except ValueError as error:
103-
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error
104-
105-
response = send_get_http_raw(url)
106-
if response and response.text:
107-
try:
108-
metadata: dict = json.loads(response.text)
109-
except requests.exceptions.JSONDecodeError as error:
110-
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
111-
if not metadata:
112-
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")
113-
83+
metadata = DepsDevService.get_package_info(purl)
84+
except APIAccessError as error:
85+
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.") from error
86+
if metadata:
11487
timestamp = json_extract(metadata, ["version", "publishedAt"], str)
11588
if not timestamp:
116-
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned by {url}.")
89+
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned for {purl}.")
11790

11891
logger.debug("Found timestamp: %s.", timestamp)
11992

12093
try:
12194
return datetime.fromisoformat(timestamp)
12295
except ValueError as error:
123-
raise InvalidHTTPResponseError(f"The timestamp returned by {url} is invalid") from error
96+
raise InvalidHTTPResponseError(f"The timestamp returned for {purl} is invalid") from error
12497

125-
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {url}.")
98+
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("check-malicious-package", component_id, "Check the malicious package.") :-
7+
check_passed(component_id, "mcn_detect_malicious_metadata_1").
8+
9+
apply_policy_to("check-malicious-package", component_id) :-
10+
is_component(component_id, "pkg:pypi/ultralytics").
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
description: |
5+
Analyzing a popular package that some of its versions are compromised.
6+
7+
tags:
8+
- macaron-python-package
9+
- macaron-docker-image
10+
11+
steps:
12+
- name: Run macaron analyze
13+
kind: analyze
14+
options:
15+
command_args:
16+
- -purl
17+
- pkg:pypi/ultralytics
18+
- name: Run macaron verify-policy to verify that the malicious metadata check passes.
19+
kind: verify
20+
options:
21+
policy: policy.dl

0 commit comments

Comments
 (0)