Skip to content

Commit 6f9d7f4

Browse files
committed
fix: report known malware even when not labeled
Signed-off-by: behnazh-w <[email protected]>
1 parent 1ea1bd5 commit 6f9d7f4

File tree

10 files changed

+229
-64
lines changed

10 files changed

+229
-64
lines changed

src/macaron/config/defaults.ini

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
[requests]
@@ -538,6 +538,11 @@ registry_url_scheme = https
538538
fileserver_url_netloc = files.pythonhosted.org
539539
fileserver_url_scheme = https
540540

541+
[deps_dev]
542+
url_netloc = api.deps.dev
543+
url_scheme = https
544+
v3alpha_purl_endpoint = v3alpha/purl
545+
541546
# Configuration options for selecting the checks to run.
542547
# Both the exclude and include are defined as list of strings:
543548
# - The exclude list is used to specify the checks that will not run.

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This check examines the metadata of pypi packages with seven heuristics."""
@@ -11,7 +11,7 @@
1111

1212
from macaron.database.db_custom_types import DBJsonDict
1313
from macaron.database.table_definitions import CheckFacts
14-
from macaron.errors import HeuristicAnalyzerValueError
14+
from macaron.errors import HeuristicAnalyzerValueError, InvalidHTTPResponseError
1515
from macaron.json_tools import JsonType, json_extract
1616
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1717
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -28,6 +28,7 @@
2828
from macaron.slsa_analyzer.build_tool.poetry import Poetry
2929
from macaron.slsa_analyzer.checks.base_check import BaseCheck
3030
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
31+
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService
3132
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
3233
from macaron.slsa_analyzer.registry import registry
3334
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
@@ -177,7 +178,7 @@ def __init__(self) -> None:
177178
"""Initialize a check instance."""
178179
check_id = "mcn_detect_malicious_metadata_1"
179180
description = """This check analyzes the metadata of a package based on reports malicious behavior.
180-
Supported ecosystem: PyPI.
181+
Supported ecosystem for unknown malware: PyPI.
181182
"""
182183
super().__init__(check_id=check_id, description=description, eval_reqs=[])
183184

@@ -259,21 +260,28 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
259260
The result of the check.
260261
"""
261262
result_tables: list[CheckFacts] = []
262-
# First check if this package is a known malware
263+
package_registry_info_entries = ctx.dynamic_data["package_registries"]
263264

265+
# First check if this package is a known malware
264266
url = "https://api.osv.dev/v1/query"
265267
data = {"package": {"purl": ctx.component.purl}}
266-
response = send_post_http_raw(url, json_data=data, headers=None)
267-
res_obj = None
268-
if response:
269-
try:
270-
res_obj = response.json()
271-
except requests.exceptions.JSONDecodeError as error:
272-
logger.debug("Unable to get a valid response from %s: %s", url, error)
273-
if res_obj:
274-
for vuln in res_obj.get("vulns", {}):
275-
v_id = json_extract(vuln, ["id"], str)
276-
if v_id and v_id.startswith("MAL-"):
268+
269+
try:
270+
package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
271+
except InvalidHTTPResponseError as error:
272+
logger.debug(error)
273+
274+
if not package_exists:
275+
response = send_post_http_raw(url, json_data=data, headers=None)
276+
res_obj = None
277+
if response:
278+
try:
279+
res_obj = response.json()
280+
except requests.exceptions.JSONDecodeError as error:
281+
logger.debug("Unable to get a valid response from %s: %s", url, error)
282+
if res_obj:
283+
for vuln in res_obj.get("vulns", {}):
284+
v_id = json_extract(vuln, ["id"], str)
277285
result_tables.append(
278286
MaliciousMetadataFacts(
279287
known_malware=f"https://osv.dev/vulnerability/{v_id}",
@@ -282,13 +290,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
282290
confidence=Confidence.HIGH,
283291
)
284292
)
285-
if result_tables:
286-
return CheckResultData(
287-
result_tables=result_tables,
288-
result_type=CheckResultType.FAILED,
289-
)
293+
if result_tables:
294+
return CheckResultData(
295+
result_tables=result_tables,
296+
result_type=CheckResultType.FAILED,
297+
)
290298

291-
package_registry_info_entries = ctx.dynamic_data["package_registries"]
292299
for package_registry_info_entry in package_registry_info_entries:
293300
match package_registry_info_entry:
294301
case PackageRegistryInfo(
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module contains implementation of deps.dev service."""
5+
6+
import json
7+
import logging
8+
import urllib.parse
9+
from urllib.parse import quote as encode
10+
11+
import requests
12+
13+
from macaron.config.defaults import defaults
14+
from macaron.errors import ConfigurationError, InvalidHTTPResponseError
15+
from macaron.util import send_get_http_raw
16+
17+
logger: logging.Logger = logging.getLogger(__name__)
18+
19+
20+
class DepsDevService:
21+
"""The deps.dev service class."""
22+
23+
@staticmethod
24+
def get_package_info(purl: str) -> dict | None:
25+
"""Check if the package identified by the PackageURL (PURL) exists and return its information.
26+
27+
Parameters
28+
----------
29+
purl: str
30+
The PackageURL (PURL).
31+
32+
Returns
33+
-------
34+
dict | None
35+
The package metadata or None if it doesn't exist.
36+
37+
Raises
38+
------
39+
InvalidHTTPResponseError
40+
If a network error happens or unexpected response is returned by the API.
41+
"""
42+
section_name = "deps_dev"
43+
if not defaults.has_section(section_name):
44+
return None
45+
section = defaults[section_name]
46+
47+
url_netloc = section.get("url_netloc")
48+
if not url_netloc:
49+
raise ConfigurationError(
50+
f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.'
51+
)
52+
url_scheme = section.get("url_scheme", "https")
53+
v3alpha_purl_endpoint = section.get("v3alpha_purl_endpoint")
54+
if not v3alpha_purl_endpoint:
55+
raise ConfigurationError(
56+
f'The "v3alpha_purl_endpoint" key is missing in section [{section_name}] of the .ini configuration file.'
57+
)
58+
59+
path_params = "/".join([v3alpha_purl_endpoint, encode(purl, safe="")])
60+
try:
61+
url = urllib.parse.urlunsplit(
62+
urllib.parse.SplitResult(
63+
scheme=url_scheme,
64+
netloc=url_netloc,
65+
path=path_params,
66+
query="",
67+
fragment="",
68+
)
69+
)
70+
except ValueError as error:
71+
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error
72+
73+
response = send_get_http_raw(url)
74+
if response and response.text:
75+
try:
76+
metadata: dict = json.loads(response.text)
77+
except requests.exceptions.JSONDecodeError as error:
78+
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
79+
if not metadata:
80+
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")
81+
return metadata
82+
83+
return None

src/macaron/slsa_analyzer/package_registry/package_registry.py

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
1-
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module defines package registries."""
55

6-
import json
76
import logging
8-
import urllib.parse
97
from abc import ABC, abstractmethod
108
from datetime import datetime
11-
from urllib.parse import quote as encode
12-
13-
import requests
149

1510
from macaron.errors import InvalidHTTPResponseError
1611
from macaron.json_tools import json_extract
1712
from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
18-
from macaron.util import send_get_http_raw
13+
from macaron.slsa_analyzer.package_registry.deps_dev import DepsDevService
1914

2015
logger: logging.Logger = logging.getLogger(__name__)
2116

@@ -50,7 +45,7 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool:
5045
based on the given build tool.
5146
"""
5247

53-
def find_publish_timestamp(self, purl: str, registry_url: str | None = None) -> datetime:
48+
def find_publish_timestamp(self, purl: str) -> datetime:
5449
"""Retrieve the publication timestamp for a package specified by its purl from the deps.dev repository by default.
5550
5651
This method constructs a request URL based on the provided purl, sends an HTTP GET
@@ -65,8 +60,6 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
6560
purl: str
6661
The Package URL (purl) of the package whose publication timestamp is to be retrieved.
6762
This should conform to the PURL specification.
68-
registry_url: str | None
69-
The registry URL that can be set for testing.
7063
7164
Returns
7265
-------
@@ -86,40 +79,20 @@ def find_publish_timestamp(self, purl: str, registry_url: str | None = None) ->
8679
# in the AnalyzeContext object retrieved by the Repo Finder. This step should be
8780
# implemented at the beginning of the analyze command to ensure that the data
8881
# is available for subsequent processing.
89-
90-
base_url_parsed = urllib.parse.urlparse(registry_url or "https://api.deps.dev")
91-
path_params = "/".join(["v3alpha", "purl", encode(purl, safe="")])
9282
try:
93-
url = urllib.parse.urlunsplit(
94-
urllib.parse.SplitResult(
95-
scheme=base_url_parsed.scheme,
96-
netloc=base_url_parsed.netloc,
97-
path=path_params,
98-
query="",
99-
fragment="",
100-
)
101-
)
102-
except ValueError as error:
103-
raise InvalidHTTPResponseError("Failed to construct the API URL.") from error
104-
105-
response = send_get_http_raw(url)
106-
if response and response.text:
107-
try:
108-
metadata: dict = json.loads(response.text)
109-
except requests.exceptions.JSONDecodeError as error:
110-
raise InvalidHTTPResponseError(f"Failed to process response from deps.dev for {url}.") from error
111-
if not metadata:
112-
raise InvalidHTTPResponseError(f"Empty response returned by {url} .")
113-
83+
metadata = DepsDevService.get_package_info(purl)
84+
except InvalidHTTPResponseError as error:
85+
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.") from error
86+
if metadata:
11487
timestamp = json_extract(metadata, ["version", "publishedAt"], str)
11588
if not timestamp:
116-
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned by {url}.")
89+
raise InvalidHTTPResponseError(f"The timestamp is missing in the response returned for {purl}.")
11790

11891
logger.debug("Found timestamp: %s.", timestamp)
11992

12093
try:
12194
return datetime.fromisoformat(timestamp)
12295
except ValueError as error:
123-
raise InvalidHTTPResponseError(f"The timestamp returned by {url} is invalid") from error
96+
raise InvalidHTTPResponseError(f"The timestamp returned for {purl} is invalid") from error
12497

125-
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {url}.")
98+
raise InvalidHTTPResponseError(f"Invalid response from deps.dev for {purl}.")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("check-malicious-package", component_id, "Check the malicious package.") :-
7+
check_passed(component_id, "mcn_detect_malicious_metadata_1").
8+
9+
apply_policy_to("check-malicious-package", component_id) :-
10+
is_component(component_id, "pkg:pypi/ultralytics").
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
description: |
5+
Analyzing a popular package that some of its versions are compromised.
6+
7+
tags:
8+
- macaron-python-package
9+
- macaron-docker-image
10+
11+
steps:
12+
- name: Run macaron analyze
13+
kind: analyze
14+
options:
15+
command_args:
16+
- -purl
17+
- pkg:pypi/ultralytics
18+
- name: Run macaron verify-policy to verify that the malicious metadata check passes.
19+
kind: verify
20+
options:
21+
policy: policy.dl
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
2+
/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
3+
4+
#include "prelude.dl"
5+
6+
Policy("check-malicious-package", component_id, "Check the malicious package.") :-
7+
check_failed(component_id, "mcn_detect_malicious_metadata_1").
8+
9+
apply_policy_to("check-malicious-package", component_id) :-
10+
is_component(component_id, "pkg:pypi/[email protected]").
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
description: |
5+
Analyzing a known malicious package.
6+
7+
tags:
8+
- macaron-python-package
9+
- macaron-docker-image
10+
11+
steps:
12+
- name: Run macaron analyze
13+
kind: analyze
14+
options:
15+
command_args:
16+
- -purl
17+
- pkg:pypi/[email protected]
18+
- name: Run macaron verify-policy to verify that the malicious metadata check fails.
19+
kind: verify
20+
options:
21+
policy: policy.dl

tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""Module to test the malicious metadata detection check."""
@@ -62,6 +62,10 @@ def test_detect_malicious_metadata(
6262
registry_url_scheme = {base_url_parsed.scheme}
6363
fileserver_url_netloc = {base_url_parsed.netloc}
6464
fileserver_url_scheme = {base_url_parsed.scheme}
65+
66+
[deps_dev]
67+
url_netloc = {base_url_parsed.netloc}
68+
url_scheme = {base_url_parsed.scheme}
6569
"""
6670
user_config_path = os.path.join(tmp_path, "config.ini")
6771
with open(user_config_path, "w", encoding="utf-8") as user_config_file:

0 commit comments

Comments
 (0)