Skip to content

Commit c035dea

Browse files
committed
Fix to_checksums with None values in dicts and recursion
Having a `'src': None` entry in a dict for checksums is as valid as having a `None` entry directly in the list. However the current function didn't handle it and crashed. Fix that as well as a few corner cases especially in the recursive case by introducing a new function for handling checksum entries in the checksum list and limiting the recursiveness. Fixes #4142
1 parent 597dca8 commit c035dea

File tree

2 files changed

+135
-38
lines changed

2 files changed

+135
-38
lines changed

easybuild/framework/easyconfig/types.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -469,33 +469,51 @@ def to_dependencies(dep_list):
469469
return [to_dependency(dep) for dep in dep_list]
470470

471471

472-
def to_checksums(checksums):
473-
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
474-
res = []
475-
for checksum in checksums:
476-
# each list entry can be:
477-
# * None (indicates no checksum)
478-
# * a string (MD5 or SHA256 checksum)
479-
# * a tuple with 2 elements: checksum type + checksum value
480-
# * a list of checksums (i.e. multiple checksums for a single file)
481-
# * a dict (filename to checksum mapping)
482-
if isinstance(checksum, string_type):
483-
res.append(checksum)
484-
elif isinstance(checksum, (list, tuple)):
485-
# 2 elements + only string/int values => a checksum tuple
486-
if len(checksum) == 2 and all(isinstance(x, (string_type, int)) for x in checksum):
487-
res.append(tuple(checksum))
472+
def _to_checksum(checksum, list_level=0, allow_dict=True):
473+
"""Ensure the correct element type for each checksum in the checksum list"""
474+
# each entry can be:
475+
# * None (indicates no checksum)
476+
# * a string (MD5, SHA256, ... checksum)
477+
# * a list or tuple with 2 elements: checksum type + checksum value
478+
# * a list or tuple of checksums (i.e. multiple checksums for a single file)
479+
# * a dict (filename to checksum mapping)
480+
if checksum is None or isinstance(checksum, string_type):
481+
return checksum
482+
elif isinstance(checksum, (list, tuple)):
483+
if len(checksum) == 2 and isinstance(checksum[0], string_type) and isinstance(checksum[1], (string_type, int)):
484+
# 2 elements so either:
485+
# - a checksum tuple (2nd element string or int)
486+
# - 2 alternative checksums (tuple)
487+
# - 2 checksums that must each match (list)
488+
# --> Convert to tuple only if we can exclude the 3rd case
489+
if not isinstance(checksum[1], string_type) or list_level > 0:
490+
return tuple(checksum)
488491
else:
489-
res.append(to_checksums(checksum))
490-
elif isinstance(checksum, dict):
491-
validated_dict = {}
492-
for key, value in checksum.items():
493-
validated_dict[key] = to_checksums(value)
494-
res.append(validated_dict)
495-
else:
496-
res.append(checksum)
492+
return checksum
493+
elif list_level < 2:
494+
# Alternative checksums or multiple checksums for a single file
495+
# Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]]
496+
# None is not allowed here
497+
if any(x is None for x in checksum):
498+
raise ValueError('Unexpected None in ' + str(checksum))
499+
if isinstance(checksum, tuple) or list_level > 0:
500+
# When we already are in a tuple no further recursion is allowed -> set list_level very high
501+
return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum)
502+
else:
503+
return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum)
504+
elif isinstance(checksum, dict) and allow_dict:
505+
return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()}
497506

498-
return res
507+
# Not returned -> Wrong type/format
508+
raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum)))
509+
510+
511+
def to_checksums(checksums):
512+
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
513+
try:
514+
return [_to_checksum(checksum) for checksum in checksums]
515+
except ValueError as e:
516+
raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e)
499517

500518

501519
def ensure_iterable_license_specs(specs):

test/framework/type_checking.py

Lines changed: 92 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,16 +172,16 @@ def test_check_type_of_param_value_sanity_check_paths(self):
172172
out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]}
173173
self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out))
174174

175-
def test_check_type_of_param_value_checksums(self):
176-
"""Test check_type_of_param_value function for checksums."""
175+
@staticmethod
176+
def get_valid_checksums_values():
177+
"""Return list of values valid for the 'checksums' EC parameter"""
177178

178179
md5_checksum = 'fa618be8435447a017fd1bf2c7ae9224'
179180
sha256_checksum1 = 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265'
180181
sha256_checksum2 = 'b5f9cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551'
181182
sha256_checksum3 = '033be54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde'
182-
183183
# valid values for 'checksums' easyconfig parameters
184-
inputs = [
184+
return [
185185
[],
186186
# single checksum (one file)
187187
[md5_checksum],
@@ -237,7 +237,11 @@ def test_check_type_of_param_value_checksums(self):
237237
{'foo.txt': sha256_checksum1, 'bar.txt': None},
238238
],
239239
]
240-
for inp in inputs:
240+
241+
def test_check_type_of_param_value_checksums(self):
242+
"""Test check_type_of_param_value function for checksums."""
243+
244+
for inp in TypeCheckingTest.get_valid_checksums_values():
241245
self.assertTrue(check_type_of_param_value('checksums', inp), 'Failed for ' + str(inp))
242246

243247
def test_check_type_of_param_value_patches(self):
@@ -720,19 +724,94 @@ def test_to_sanity_check_paths_dict(self):
720724

721725
def test_to_checksums(self):
722726
"""Test to_checksums function."""
727+
# Some hand-crafted examples. Only the types are important, values are for easier verification
723728
test_inputs = [
724-
['be662daa971a640e40be5c804d9d7d10'],
725-
['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')],
726-
[['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]],
727-
[('md5', 'be662daa971a640e40be5c804d9d7d10')],
728-
['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'),
729-
('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
730-
('size', 273)],
729+
['checksumvalue'],
730+
[('md5', 'md5checksumvalue')],
731+
['file_1_checksum', ('md5', 'file_2_md5_checksum')],
732+
# One checksum per file, some with checksum type
733+
[
734+
'be662daa971a640e40be5c804d9d7d10',
735+
('adler32', '0x998410035'),
736+
('crc32', '0x1553842328'),
737+
('md5', 'be662daa971a640e40be5c804d9d7d10'),
738+
('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
739+
# int type as the 2nd value
740+
('size', 273),
741+
],
731742
# None values should not be filtered out, but left in place
732-
[None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None],
743+
[None, 'checksum', None],
744+
# Alternative checksums, not to be confused with multiple checksums for a file
745+
[('main_checksum', 'alternative_checksum')],
746+
[('1st_of_3', '2nd_of_3', '3rd_of_3')],
747+
# Lists must be kept: This means all must match
748+
[['checksum_1_in_list']],
749+
[['checksum_must_match', 'this_must_also_match']],
750+
[['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']],
751+
# Alternative checksums with types
752+
[
753+
(('adler32', '1st_adler'), ('crc32', '1st_crc')),
754+
(('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')),
755+
],
756+
# Entries can be dicts even containing `None`
757+
[
758+
{
759+
'src-arm.tgz': 'arm_checksum',
760+
'src-x86.tgz': ('mainchecksum', 'altchecksum'),
761+
'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')),
762+
'git-clone.tgz': None,
763+
},
764+
{
765+
'src': ['checksum_must_match', 'this_must_also_match']
766+
},
767+
# 2nd required checksum a dict
768+
['first_checksum', {'src-arm': 'arm_checksum'}]
769+
],
733770
]
734771
for checksums in test_inputs:
735772
self.assertEqual(to_checksums(checksums), checksums)
773+
# Also reuse the checksums we use in test_check_type_of_param_value_checksums
774+
# When a checksum is valid it must not be modified
775+
for checksums in TypeCheckingTest.get_valid_checksums_values():
776+
self.assertEqual(to_checksums(checksums), checksums)
777+
778+
# List in list converted to tuple -> alternatives or checksum with type
779+
checksums = [['1stchecksum', ['md5', 'md5sum']]]
780+
checksums_expected = [['1stchecksum', ('md5', 'md5sum')]]
781+
self.assertEqual(to_checksums(checksums), checksums_expected)
782+
783+
# Error detection
784+
wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))]
785+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting)
786+
correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]
787+
self.assertEqual(to_checksums(correct_nesting), correct_nesting)
788+
# YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted
789+
correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]]
790+
correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]]
791+
self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv)
792+
self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv)
793+
794+
unexpected_set = [('1stchecksum', {'md5', 'md5sum'})]
795+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set)
796+
unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}]
797+
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict)
798+
correct_dict = [{'src': ('md5sum', 'shasum')}]
799+
self.assertEqual(to_checksums(correct_dict), correct_dict)
800+
correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}]
801+
correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}]
802+
self.assertEqual(to_checksums(correct_dict_2), correct_dict_2)
803+
self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples
804+
805+
unexpected_Nones = [
806+
[('1stchecksum', None)],
807+
[['1stchecksum', None]],
808+
[{'src': ('md5sum', None)}],
809+
[{'src': ['md5sum', None]}],
810+
]
811+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0])
812+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1])
813+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2])
814+
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3])
736815

737816
def test_ensure_iterable_license_specs(self):
738817
"""Test ensure_iterable_license_specs function."""

0 commit comments

Comments
 (0)