Skip to content

Commit 69b6bd8

Browse files
committed
feat: added in loading from defaults.ini. Removed minor version checking until testing suggests it is needed
1 parent 6a4ab61 commit 69b6bd8

File tree

2 files changed

+33
-16
lines changed

2 files changed

+33
-16
lines changed

src/macaron/config/defaults.ini

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,3 +579,10 @@ releases_frequency_threshold = 2
579579
# The gap threshold.
580580
# The timedelta indicate the gap between the date maintainer registers their pypi's account and the date of latest release.
581581
timedelta_threshold_of_join_release = 5
582+
583+
# Any major version above this value is detected as anomalistic and marked as suspicious
584+
major_threshold = 20
585+
# Any epoch number avove this value is detected as anomalistic and marked as suspicious
586+
epoch_threshold = 5
587+
# The number of days +/- the day of publish the calendar versioning day may be
588+
day_publish_error = 2

src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalistic_version.py

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from packaging.version import InvalidVersion, parse
1010

11+
from macaron.config.defaults import defaults
1112
from macaron.errors import HeuristicAnalyzerValueError
1213
from macaron.json_tools import JsonType, json_extract
1314
from macaron.malware_analyzer.datetime_parser import parse_datetime
@@ -35,12 +36,7 @@ class AnomalisticVersionAnalyzer(BaseHeuristicAnalyzer):
3536
All other versionings are detected as semantic versioning.
3637
"""
3738

38-
DATETIME_FORMAT: str = "%Y-%m-%dT%H:%M:%S"
39-
40-
MAJOR_THRESHOLD: int = 20
41-
MINOR_THRESHOLD: int = 40
42-
EPOCH_THRESHOLD: int = 5
43-
39+
CALENDAR_YEAR_LENGTHS: list[int] = [2, 4]
4440
DETAIL_INFO_KEY: str = "versioning"
4541

4642
def __init__(self) -> None:
@@ -49,6 +45,20 @@ def __init__(self) -> None:
4945
heuristic=Heuristics.ANOMALISTIC_VERSION,
5046
depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.FAIL)],
5147
)
48+
self.major_threshold, self.epoch_threshold, self.day_publish_error = self._load_defaults()
49+
50+
def _load_defaults(self) -> tuple[int, int, int]:
51+
"""Load default settings from defaults.ini."""
52+
section_name = "heuristic.pypi"
53+
if defaults.has_section(section_name):
54+
section = defaults[section_name]
55+
return (
56+
section.getint("major_threshold"),
57+
section.getint("epoch_threshold"),
58+
section.getint("day_publish_error"),
59+
)
60+
# Major threshold, Epoch threshold, Day pushlish error
61+
return 20, 5, 2
5262

5363
def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
5464
"""Analyze the package.
@@ -103,7 +113,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
103113

104114
calendar_semantic = False
105115

106-
if len(str(version.major)) == 4 or len(str(version.major)) == 2:
116+
if len(str(version.major)) in self.CALENDAR_YEAR_LENGTHS:
107117
# possible this version number refers to a date
108118

109119
for distribution in release_metadata:
@@ -113,7 +123,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
113123
logger.debug(error_msg)
114124
raise HeuristicAnalyzerValueError(error_msg)
115125

116-
parsed_time = parse_datetime(upload_time, self.DATETIME_FORMAT)
126+
parsed_time = parse_datetime(upload_time)
117127
if parsed_time is None:
118128
error_msg = "Upload time is not of the expected PyPI format"
119129
logger.debug(error_msg)
@@ -123,12 +133,14 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
123133
# the major of the version refers to the year published
124134
if (
125135
parsed_time.month == version.minor
126-
and parsed_time.day + 2 >= version.micro >= parsed_time.day - 2
136+
and parsed_time.day + self.day_publish_error
137+
>= version.micro
138+
>= parsed_time.day - self.day_publish_error
127139
and len(version.release) == 3
128140
):
129141
# In the format of full_year.month.day or year.month.day, with a 48-hour buffer for timezone differences
130142
detail_info: dict[str, JsonType] = {self.DETAIL_INFO_KEY: Versioning.CALENDAR.value}
131-
if version.epoch > self.EPOCH_THRESHOLD:
143+
if version.epoch > self.epoch_threshold:
132144
return HeuristicResult.FAIL, detail_info
133145

134146
return HeuristicResult.PASS, detail_info
@@ -138,21 +150,19 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
138150
if calendar_semantic:
139151
detail_info = {self.DETAIL_INFO_KEY: Versioning.CALENDAR_SEMANTIC.value}
140152
# analyze starting from the minor instead
141-
if version.epoch > self.EPOCH_THRESHOLD:
153+
if version.epoch > self.epoch_threshold:
142154
return HeuristicResult.FAIL, detail_info
143-
if version.minor > self.MAJOR_THRESHOLD:
155+
if version.minor > self.major_threshold:
144156
return HeuristicResult.FAIL, detail_info
145157

146158
return HeuristicResult.PASS, detail_info
147159

148160
# semantic versioning
149161
detail_info = {self.DETAIL_INFO_KEY: Versioning.SEMANTIC.value}
150162

151-
if version.epoch > self.EPOCH_THRESHOLD:
152-
return HeuristicResult.FAIL, detail_info
153-
if version.major > self.MAJOR_THRESHOLD:
163+
if version.epoch > self.epoch_threshold:
154164
return HeuristicResult.FAIL, detail_info
155-
if version.minor > self.MINOR_THRESHOLD:
165+
if version.major > self.major_threshold:
156166
return HeuristicResult.FAIL, detail_info
157167

158168
return HeuristicResult.PASS, detail_info

0 commit comments

Comments
 (0)