Skip to content

Commit 8132905

Browse files
committed
fix limiting parallelism based on available cores in post-ready hook
1 parent 99c82b5 commit 8132905

File tree

1 file changed

+24
-14
lines changed

1 file changed

+24
-14
lines changed

eb_hooks.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from easybuild.tools.config import build_option, install_path, update_build_option
1717
from easybuild.tools.filetools import apply_regex_substitutions, copy_dir, copy_file, remove_file, symlink, which
1818
from easybuild.tools.run import run_cmd
19-
from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_cpu_features
19+
from easybuild.tools.systemtools import AARCH64, POWER, X86_64, det_parallelism, get_cpu_architecture, get_cpu_features
2020
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC
2121
from easybuild.tools.toolchain.toolchain import is_system_toolchain
2222
from easybuild.tools.version import VERSION as EASYBUILD_VERSION
@@ -205,20 +205,29 @@ def post_ready_hook(self, *args, **kwargs):
205205
Post-ready hook: limit parallellism for selected builds based on software name and CPU target.
206206
parallelism needs to be limited because some builds require a lot of memory per used core.
207207
"""
208-
# 'parallel' easyconfig parameter (EB4) or the parallel property (EB5) is set via EasyBlock.set_parallel
209-
# in ready step based on available cores
208+
if self.iter_idx > 0:
209+
# only tweak level of parallelism in 1st iteration, not subsequent ones
210+
self.log.info(f"Not limiting parallellism again in iteration #{self.iter_idx+1}")
211+
return
212+
213+
# 'parallel' easyconfig parameter (EasyBuild 4.x) or the parallel property (EasyBuild 5.x)
214+
# is set via EasyBlock.set_parallel in ready step based on available cores
215+
# (and --max-parallel EasyBuild configuration option in EasyBuild 5.x)
210216
if hasattr(self, 'parallel'):
211-
parallel = self.parallel
217+
curr_parallel = self.parallel
212218
else:
213-
parallel = self.cfg['parallel']
219+
curr_parallel = self.cfg['parallel']
214220

215-
if parallel == 1:
221+
if curr_parallel == 1:
216222
return # no need to limit if already using 1 core
217223

218224
# get CPU target
219225
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
220226

221-
new_parallel = parallel
227+
# derive level of parallelism from available cores and ulimit settings in current session
228+
session_parallel = det_parallelism()
229+
230+
new_parallel = None
222231

223232
# check if we have limits defined for this software
224233
if self.name in PARALLELISM_LIMITS:
@@ -227,27 +236,28 @@ def post_ready_hook(self, *args, **kwargs):
227236
# first check for CPU-specific limit
228237
if cpu_target in limits:
229238
operation_func, operation_args = limits[cpu_target]
230-
new_parallel = operation_func(parallel, operation_args)
239+
new_parallel = operation_func(session_parallel, operation_args)
231240
# then check for generic limit (applies to all CPU targets)
232241
elif '*' in limits:
233242
operation_func, operation_args = limits['*']
234-
new_parallel = operation_func(parallel, operation_args)
243+
new_parallel = operation_func(session_parallel, operation_args)
235244
else:
236245
return # no applicable limits found
237246

238247
# check if there's a general limit set for CPU target
239248
elif cpu_target in PARALLELISM_LIMITS:
240249
operation_func, operation_args = PARALLELISM_LIMITS[cpu_target]
241-
new_parallel = operation_func(parallel, operation_args)
250+
new_parallel = operation_func(session_parallel, operation_args)
242251

243-
# apply the limit if it's different from current
244-
if new_parallel != parallel:
252+
# apply the limit if it's lower than current
253+
if new_parallel is not None and new_parallel < curr_parallel:
245254
if hasattr(self, 'parallel'):
246255
self.cfg.parallel = new_parallel
247256
else:
248257
self.cfg['parallel'] = new_parallel
249-
msg = "limiting parallelism to %s (was %s) for %s on %s to avoid out-of-memory failures during building/testing"
250-
print_msg(msg % (new_parallel, parallel, self.name, cpu_target), log=self.log)
258+
msg = "limiting parallelism to %s (was %s, derived parallelism %s) for %s on %s "
259+
msg+ "to avoid out-of-memory failures during building/testing"
260+
print_msg(msg % (new_parallel, curr_parallel, session_parallel, self.name, cpu_target), log=self.log)
251261

252262

253263
def pre_prepare_hook(self, *args, **kwargs):

0 commit comments

Comments
 (0)