Skip to content

Commit ba54226

Browse files
committed
feat: added in support for different date representations
1 parent 4d4ab62 commit ba54226

File tree

2 files changed

+129
-50
lines changed

2 files changed

+129
-50
lines changed

src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalistic_version.py

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ class AnomalisticVersionAnalyzer(BaseHeuristicAnalyzer):
3636
All other versionings are detected as semantic versioning.
3737
"""
3838

39-
CALENDAR_YEAR_LENGTHS: list[int] = [2, 4]
4039
DETAIL_INFO_KEY: str = "versioning"
4140

4241
def __init__(self) -> None:
@@ -111,44 +110,59 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
111110
except InvalidVersion:
112111
return HeuristicResult.SKIP, {self.DETAIL_INFO_KEY: Versioning.INVALID.value}
113112

113+
years = []
114+
months = []
115+
publish_days = []
116+
117+
for distribution in release_metadata:
118+
upload_time = json_extract(distribution, ["upload_time"], str)
119+
if upload_time is None:
120+
error_msg = "Missing upload time from release information"
121+
logger.debug(error_msg)
122+
raise HeuristicAnalyzerValueError(error_msg)
123+
124+
parsed_time = parse_datetime(upload_time)
125+
if parsed_time is None:
126+
error_msg = "Upload time is not of the expected PyPI format"
127+
logger.debug(error_msg)
128+
raise HeuristicAnalyzerValueError(error_msg)
129+
130+
years.append(parsed_time.year)
131+
years.append(parsed_time.year % 100) # last 2 digits
132+
months.append(parsed_time.month)
133+
publish_days.append(parsed_time.day)
134+
135+
days = list(range(min(publish_days) - self.day_publish_error, max(publish_days) + self.day_publish_error + 1))
136+
137+
calendar = False
114138
calendar_semantic = False
115139

116-
if len(str(version.major)) in self.CALENDAR_YEAR_LENGTHS:
117-
# possible this version number refers to a date
118-
119-
for distribution in release_metadata:
120-
upload_time = json_extract(distribution, ["upload_time"], str)
121-
if upload_time is None:
122-
error_msg = "Missing upload time from release information"
123-
logger.debug(error_msg)
124-
raise HeuristicAnalyzerValueError(error_msg)
125-
126-
parsed_time = parse_datetime(upload_time)
127-
if parsed_time is None:
128-
error_msg = "Upload time is not of the expected PyPI format"
129-
logger.debug(error_msg)
130-
raise HeuristicAnalyzerValueError(error_msg)
131-
132-
if version.major in (parsed_time.year, parsed_time.year % 100):
133-
# the major of the version refers to the year published
134-
if (
135-
parsed_time.month == version.minor
136-
and parsed_time.day + self.day_publish_error
137-
>= version.micro
138-
>= parsed_time.day - self.day_publish_error
139-
):
140-
# In the format of full_year.month.day or year.month.day, with a buffer for the day
141-
detail_info: dict[str, JsonType] = {self.DETAIL_INFO_KEY: Versioning.CALENDAR.value}
142-
if version.epoch > self.epoch_threshold:
143-
return HeuristicResult.FAIL, detail_info
144-
145-
return HeuristicResult.PASS, detail_info
146-
147-
calendar_semantic = True
148-
149-
if calendar_semantic:
140+
# check for year YY[YY]...
141+
if version.major in years:
142+
# calendar versioning: YY[YY].(M[M].D[D])(D[D].M[M])...
143+
if (version.minor in months and version.micro in days) or (
144+
version.minor in days and version.micro in months
145+
):
146+
calendar = True
147+
else:
148+
calendar_semantic = True
149+
# check for calendar versioning: M[M].D[D].YY[YY]... or D[D].M[M].YY[YY]...
150+
elif (
151+
(version.major in months and version.minor in days) or (version.major in days and version.minor in months)
152+
) and version.micro in years:
153+
# must include day and year for this to be calendar
154+
calendar = True
155+
156+
if calendar: # just check epoch
157+
detail_info: dict[str, JsonType] = {self.DETAIL_INFO_KEY: Versioning.CALENDAR.value}
158+
if version.epoch > self.epoch_threshold:
159+
return HeuristicResult.FAIL, detail_info
160+
161+
return HeuristicResult.PASS, detail_info
162+
163+
if calendar_semantic: # check minor (as major) and epoch
150164
detail_info = {self.DETAIL_INFO_KEY: Versioning.CALENDAR_SEMANTIC.value}
151-
# analyze starting from the minor instead
165+
152166
if version.epoch > self.epoch_threshold:
153167
return HeuristicResult.FAIL, detail_info
154168
if version.minor > self.major_threshold:

tests/malware_analyzer/pypi/test_anomalistic_version.py

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from macaron.errors import HeuristicAnalyzerValueError
1010
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
11-
from macaron.malware_analyzer.pypi_heuristics.metadata.anomalistic_version import AnomalisticVersionAnalyzer
11+
from macaron.malware_analyzer.pypi_heuristics.metadata.anomalistic_version import AnomalisticVersionAnalyzer, Versioning
1212

1313

1414
def test_analyze_no_information(pypi_package_json: MagicMock) -> None:
@@ -24,34 +24,99 @@ def test_analyze_no_information(pypi_package_json: MagicMock) -> None:
2424
@pytest.mark.parametrize(
2525
("version", "upload_date", "result", "versioning"),
2626
[
27-
pytest.param("2016-10-13", "2016-10-13", HeuristicResult.SKIP, "invalid", id="test_invalid_version"),
28-
pytest.param("2016.10.12.7.3.5", "2016-10-13", HeuristicResult.PASS, "calendar", id="test_calendar_pass"),
29-
pytest.param("2!16.10.14.2.5.3", "2016-10-13", HeuristicResult.PASS, "calendar", id="test_calendar_epoch_pass"),
30-
pytest.param("100!2016.10.14", "2016-10-13", HeuristicResult.FAIL, "calendar", id="test_calendar_epoch_fail"),
3127
pytest.param(
32-
"2016.7.2", "2016-10-13", HeuristicResult.PASS, "calendar_semantic", id="test_calendar_semantic_pass"
28+
"2016-10-13", "2016-10-13", HeuristicResult.SKIP, Versioning.INVALID.value, id="test_invalid_version"
3329
),
3430
pytest.param(
35-
"2016.100.0", "2016-10-13", HeuristicResult.FAIL, "calendar_semantic", id="test_calendar_semantic_fail"
31+
"2016.10.11",
32+
"2016-10-13",
33+
HeuristicResult.PASS,
34+
Versioning.CALENDAR.value,
35+
id="test_calendar_YYYY.MM.DD_pass",
36+
),
37+
pytest.param(
38+
"2016.12.10",
39+
"2016-10-13",
40+
HeuristicResult.PASS,
41+
Versioning.CALENDAR.value,
42+
id="test_calendar_YYYY.DD.MM_pass",
43+
),
44+
pytest.param(
45+
"16.10.13", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_YY.DD.MM_pass"
46+
),
47+
pytest.param(
48+
"16.14.10", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_YY.MM.DD_pass"
49+
),
50+
pytest.param(
51+
"10.10.2016",
52+
"2016-10-13",
53+
HeuristicResult.PASS,
54+
Versioning.CALENDAR.value,
55+
id="test_calendar_MM.DD.YYYY_pass",
56+
),
57+
pytest.param(
58+
"9.10.2016",
59+
"2016-10-13",
60+
HeuristicResult.PASS,
61+
Versioning.CALENDAR.value,
62+
id="test_calendar_DD.MM.YYYY_pass",
63+
),
64+
pytest.param(
65+
"10.15.16", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_DD.MM.YY_pass"
66+
),
67+
pytest.param(
68+
"16.10.16", "2016-10-13", HeuristicResult.PASS, Versioning.CALENDAR.value, id="test_calendar_MM.DD.YY_pass"
69+
),
70+
pytest.param(
71+
"2!16.10.17.2.5.3",
72+
"2016-10-13",
73+
HeuristicResult.PASS,
74+
Versioning.CALENDAR.value,
75+
id="test_calendar_epoch_pass",
76+
),
77+
pytest.param(
78+
"100!2016.10.14",
79+
"2016-10-13",
80+
HeuristicResult.FAIL,
81+
Versioning.CALENDAR.value,
82+
id="test_calendar_epoch_fail",
83+
),
84+
pytest.param(
85+
"2016.7.2",
86+
"2016-10-13",
87+
HeuristicResult.PASS,
88+
Versioning.CALENDAR_SEMANTIC.value,
89+
id="test_calendar_semantic_pass",
90+
),
91+
pytest.param(
92+
"2016.100.0",
93+
"2016-10-13",
94+
HeuristicResult.FAIL,
95+
Versioning.CALENDAR_SEMANTIC.value,
96+
id="test_calendar_semantic_fail",
3697
),
3798
pytest.param(
3899
"2!2016.1.5.6",
39100
"2016-10-13",
40101
HeuristicResult.PASS,
41-
"calendar_semantic",
102+
Versioning.CALENDAR_SEMANTIC.value,
42103
id="test_calendar_semantic_epoch_pass",
43104
),
44105
pytest.param(
45106
"100!2016.1",
46107
"2016-10-13",
47108
HeuristicResult.FAIL,
48-
"calendar_semantic",
109+
Versioning.CALENDAR_SEMANTIC.value,
49110
id="test_calendar_semantic_epoch_fail",
50111
),
51-
pytest.param("3.1", "2016-10-13", HeuristicResult.PASS, "semantic", id="test_semantic_pass"),
52-
pytest.param("999", "2016-10-13", HeuristicResult.FAIL, "semantic", id="test_semantic_fail"),
53-
pytest.param("3!0.1.9999", "2016-10-13", HeuristicResult.PASS, "semantic", id="test_semantic_epoch_pass"),
54-
pytest.param("999!0.0.0", "2016-10-13", HeuristicResult.FAIL, "semantic", id="test_semantic_epoch_fail"),
112+
pytest.param("3.1", "2016-10-13", HeuristicResult.PASS, Versioning.SEMANTIC.value, id="test_semantic_pass"),
113+
pytest.param("999", "2016-10-13", HeuristicResult.FAIL, Versioning.SEMANTIC.value, id="test_semantic_fail"),
114+
pytest.param(
115+
"3!0.1.9999", "2016-10-13", HeuristicResult.PASS, Versioning.SEMANTIC.value, id="test_semantic_epoch_pass"
116+
),
117+
pytest.param(
118+
"999!0.0.0", "2016-10-13", HeuristicResult.FAIL, Versioning.SEMANTIC.value, id="test_semantic_epoch_fail"
119+
),
55120
],
56121
)
57122
def test_analyze(
@@ -101,7 +166,7 @@ def test_analyze(
101166

102167
pypi_package_json.get_releases.return_value = release
103168
pypi_package_json.get_latest_version.return_value = version
104-
expected_result: tuple[HeuristicResult, dict] = (result, {"versioning": versioning})
169+
expected_result: tuple[HeuristicResult, dict] = (result, {AnomalisticVersionAnalyzer.DETAIL_INFO_KEY: versioning})
105170

106171
actual_result = analyzer.analyze(pypi_package_json)
107172

0 commit comments

Comments
 (0)