From 2d831f6eb480568086d2a667925fb7d730098279 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 21:36:53 +0200 Subject: [PATCH 01/11] Add zstd support for faster tarball creation or extraction when --resume and/or --save is used and zstd is avaialble --- eessi_container.sh | 102 +++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index c82d6b9936..c8920902e4 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -89,8 +89,6 @@ display_help() { echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" echo " attach a GPU, MODE==all for both [default: false]" - echo " -p | --pass-through ARG - argument to pass through to the launch of the" - echo " container; can be given multiple times [default: not set]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use; can be given multiple times;" echo " CFG may include a suffix ',access={ro,rw}' to" @@ -128,7 +126,6 @@ VERBOSE=0 STORAGE= LIST_REPOS=0 MODE="shell" -PASS_THROUGH=() SETUP_NVIDIA=0 REPOSITORIES=() RESUME= @@ -185,10 +182,6 @@ while [[ $# -gt 0 ]]; do NVIDIA_MODE="$2" shift 2 ;; - -p|--pass-through) - PASS_THROUGH+=("$2") - shift 2 - ;; -r|--repository) REPOSITORIES+=("$2") shift 2 @@ -370,47 +363,55 @@ fi # 2. set up host storage/tmp if necessary # if session to be resumed from a previous one (--resume ARG) and ARG is a directory # just reuse ARG, define environment variables accordingly and skip creating a new -# tmp storage +# eessi.XXXXXXXXXXX tempdir within TMPDIR + +# But before we call mktemp, we need to potentially set or create TMPDIR +# as location for temporary data use in the following order +# a. command line argument -l|--host-storage +# b. env var TMPDIR +# c. /tmp +# note, we ensure that (a) takes precedence by setting TMPDIR to STORAGE +# if STORAGE is not empty +# note, (b) & (c) are automatically ensured by using 'mktemp -d --tmpdir' to +# create a temporary directory +if [[ ! -z ${STORAGE} ]]; then + export TMPDIR=${STORAGE} + # mktemp fails if TMPDIR does not exist, so let's create it + mkdir -p ${TMPDIR} +fi +if [[ ! -z ${TMPDIR} ]]; then + # TODO check if TMPDIR already exists + # mktemp fails if TMPDIR does not exist, so let's create it + mkdir -p ${TMPDIR} +fi +if [[ -z ${TMPDIR} ]]; then + # mktemp falls back to using /tmp if TMPDIR is empty + # TODO check if /tmp is writable, large enough and usable (different + # features for ro-access and rw-access) + [[ ${VERBOSE} -eq 1 ]] && echo "skipping sanity checks for /tmp" +fi + +# Now, set the EESSI_HOST_STORAGE either baed on the resumed directory, or create a new one with mktemp if [[ ! -z ${RESUME} && -d ${RESUME} ]]; then # resume from directory ${RESUME} # skip creating a new tmp directory, just set environment variables echo "Resuming from previous run using temporary storage at ${RESUME}" EESSI_HOST_STORAGE=${RESUME} else - # we need a tmp location (and possibly init it with ${RESUME} if it was not - # a directory - - # as location for temporary data use in the following order - # a. command line argument -l|--host-storage - # b. env var TMPDIR - # c. /tmp - # note, we ensure that (a) takes precedence by setting TMPDIR to STORAGE - # if STORAGE is not empty - # note, (b) & (c) are automatically ensured by using 'mktemp -d --tmpdir' to - # create a temporary directory - if [[ ! -z ${STORAGE} ]]; then - export TMPDIR=${STORAGE} - # mktemp fails if TMPDIR does not exist, so let's create it - mkdir -p ${TMPDIR} - fi - if [[ ! -z ${TMPDIR} ]]; then - # TODO check if TMPDIR already exists - # mktemp fails if TMPDIR does not exist, so let's create it - mkdir -p ${TMPDIR} - fi - if [[ -z ${TMPDIR} ]]; then - # mktemp falls back to using /tmp if TMPDIR is empty - # TODO check if /tmp is writable, large enough and usable (different - # features for ro-access and rw-access) - [[ ${VERBOSE} -eq 1 ]] && echo "skipping sanity checks for /tmp" - fi EESSI_HOST_STORAGE=$(mktemp -d --tmpdir eessi.XXXXXXXXXX) echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi -# if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} +# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then - tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} + if [[ "${RESUME}" == *.tgz ]]; then + tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} + # Add support for resuming from zstd-compressed tarballs + elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then + zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} + elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then + fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" + fi echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" fi @@ -849,11 +850,6 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then export APPTAINERENV_EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE} fi -# add pass through arguments -for arg in "${PASS_THROUGH[@]}"; do - ADDITIONAL_CONTAINER_OPTIONS+=(${arg}) -done - echo "Launching container with command (next line):" echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" @@ -865,17 +861,31 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used + + # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. + # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - TGZ=${SAVE}/tmp_storage-${ts}.tgz + if [[ -x "$(command -v zstd)" ]]; then + TARBALL=${SAVE}/tmp_storage-${ts}.zst + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + TARBALL=${SAVE}/tmp_storage-${ts}.tgz + tar czf ${TARBALL} -C ${EESSI_TMPDIR} . + fi else # assume SAVE is the full path to a tarball's name - TGZ=${SAVE} + TARBALL=${SAVE} + # if zstd is present and a .zst extension is asked for, use it + if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + tar czf ${TARBALL} -C ${EESSI_TMPDIR} + fi fi - tar czf ${TGZ} -C ${EESSI_TMPDIR} . - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From 901dc67c43d76077406f1f8eb40c9761d6fd00b8 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 21:40:57 +0200 Subject: [PATCH 02/11] Add SAMtools as a test for the build pipeline speed --- .../software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml index 18620a57e0..9c50eb671d 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml @@ -45,3 +45,4 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/22469 from-commit: fc22841fef99cbb2a221c18029b15e692e78c27c + - SAMtools-1.19.2-GCC-13.2.0.eb From 9cad884a457a8d1b40134501baa4cfdd260df579 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 21:46:30 +0200 Subject: [PATCH 03/11] Restore original eessi_container.sh --- eessi_container.sh | 102 ++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 56 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index c8920902e4..c82d6b9936 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -89,6 +89,8 @@ display_help() { echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," echo " MODE==install for a CUDA installation, MODE==run to" echo " attach a GPU, MODE==all for both [default: false]" + echo " -p | --pass-through ARG - argument to pass through to the launch of the" + echo " container; can be given multiple times [default: not set]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use; can be given multiple times;" echo " CFG may include a suffix ',access={ro,rw}' to" @@ -126,6 +128,7 @@ VERBOSE=0 STORAGE= LIST_REPOS=0 MODE="shell" +PASS_THROUGH=() SETUP_NVIDIA=0 REPOSITORIES=() RESUME= @@ -182,6 +185,10 @@ while [[ $# -gt 0 ]]; do NVIDIA_MODE="$2" shift 2 ;; + -p|--pass-through) + PASS_THROUGH+=("$2") + shift 2 + ;; -r|--repository) REPOSITORIES+=("$2") shift 2 @@ -363,55 +370,47 @@ fi # 2. set up host storage/tmp if necessary # if session to be resumed from a previous one (--resume ARG) and ARG is a directory # just reuse ARG, define environment variables accordingly and skip creating a new -# eessi.XXXXXXXXXXX tempdir within TMPDIR - -# But before we call mktemp, we need to potentially set or create TMPDIR -# as location for temporary data use in the following order -# a. command line argument -l|--host-storage -# b. env var TMPDIR -# c. /tmp -# note, we ensure that (a) takes precedence by setting TMPDIR to STORAGE -# if STORAGE is not empty -# note, (b) & (c) are automatically ensured by using 'mktemp -d --tmpdir' to -# create a temporary directory -if [[ ! -z ${STORAGE} ]]; then - export TMPDIR=${STORAGE} - # mktemp fails if TMPDIR does not exist, so let's create it - mkdir -p ${TMPDIR} -fi -if [[ ! -z ${TMPDIR} ]]; then - # TODO check if TMPDIR already exists - # mktemp fails if TMPDIR does not exist, so let's create it - mkdir -p ${TMPDIR} -fi -if [[ -z ${TMPDIR} ]]; then - # mktemp falls back to using /tmp if TMPDIR is empty - # TODO check if /tmp is writable, large enough and usable (different - # features for ro-access and rw-access) - [[ ${VERBOSE} -eq 1 ]] && echo "skipping sanity checks for /tmp" -fi - -# Now, set the EESSI_HOST_STORAGE either baed on the resumed directory, or create a new one with mktemp +# tmp storage if [[ ! -z ${RESUME} && -d ${RESUME} ]]; then # resume from directory ${RESUME} # skip creating a new tmp directory, just set environment variables echo "Resuming from previous run using temporary storage at ${RESUME}" EESSI_HOST_STORAGE=${RESUME} else + # we need a tmp location (and possibly init it with ${RESUME} if it was not + # a directory + + # as location for temporary data use in the following order + # a. command line argument -l|--host-storage + # b. env var TMPDIR + # c. /tmp + # note, we ensure that (a) takes precedence by setting TMPDIR to STORAGE + # if STORAGE is not empty + # note, (b) & (c) are automatically ensured by using 'mktemp -d --tmpdir' to + # create a temporary directory + if [[ ! -z ${STORAGE} ]]; then + export TMPDIR=${STORAGE} + # mktemp fails if TMPDIR does not exist, so let's create it + mkdir -p ${TMPDIR} + fi + if [[ ! -z ${TMPDIR} ]]; then + # TODO check if TMPDIR already exists + # mktemp fails if TMPDIR does not exist, so let's create it + mkdir -p ${TMPDIR} + fi + if [[ -z ${TMPDIR} ]]; then + # mktemp falls back to using /tmp if TMPDIR is empty + # TODO check if /tmp is writable, large enough and usable (different + # features for ro-access and rw-access) + [[ ${VERBOSE} -eq 1 ]] && echo "skipping sanity checks for /tmp" + fi EESSI_HOST_STORAGE=$(mktemp -d --tmpdir eessi.XXXXXXXXXX) echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi -# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} +# if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then - if [[ "${RESUME}" == *.tgz ]]; then - tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} - # Add support for resuming from zstd-compressed tarballs - elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then - zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} - elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then - fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" - fi + tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" fi @@ -850,6 +849,11 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then export APPTAINERENV_EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE} fi +# add pass through arguments +for arg in "${PASS_THROUGH[@]}"; do + ADDITIONAL_CONTAINER_OPTIONS+=(${arg}) +done + echo "Launching container with command (next line):" echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" @@ -861,31 +865,17 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used - - # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. - # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - if [[ -x "$(command -v zstd)" ]]; then - TARBALL=${SAVE}/tmp_storage-${ts}.zst - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - TARBALL=${SAVE}/tmp_storage-${ts}.tgz - tar czf ${TARBALL} -C ${EESSI_TMPDIR} . - fi + TGZ=${SAVE}/tmp_storage-${ts}.tgz else # assume SAVE is the full path to a tarball's name - TARBALL=${SAVE} - # if zstd is present and a .zst extension is asked for, use it - if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - tar czf ${TARBALL} -C ${EESSI_TMPDIR} - fi + TGZ=${SAVE} fi - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" + tar czf ${TGZ} -C ${EESSI_TMPDIR} . + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From 96bb45ff9bf1b4d010d497a87ccadf08bf879f2d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 21:56:45 +0200 Subject: [PATCH 04/11] Fix typo --- test_suite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_suite.sh b/test_suite.sh index 4121a37c2e..f5f3255841 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -203,7 +203,7 @@ else fatal_error "Failed to extract names of tests to run: ${REFRAME_NAME_ARGS}" exit ${test_selection_exit_code} fi -# Allow people deploying the bot to overrwide this +# Allow people deploying the bot to override this if [ -z "$REFRAME_SCALE_TAG" ]; then REFRAME_SCALE_TAG="--tag 1_node" fi From 6e7b2527a966ae1eade0244897a318e8676bfb81 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 21:56:58 +0200 Subject: [PATCH 05/11] Implement zstd support --- eessi_container.sh | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index c82d6b9936..d6b7065d3f 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -408,6 +408,19 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi +# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} +if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then + if [[ "${RESUME}" == *.tgz ]]; then + tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} + # Add support for resuming from zstd-compressed tarballs + elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then + zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} + elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then + fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" + fi + echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" +fi + # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -865,17 +878,30 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used + # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. + # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - TGZ=${SAVE}/tmp_storage-${ts}.tgz + if [[ -x "$(command -v zstd)" ]]; then + TARBALL=${SAVE}/tmp_storage-${ts}.zst + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + TARBALL=${SAVE}/tmp_storage-${ts}.tgz + tar czf ${TARBALL} -C ${EESSI_TMPDIR} . + fi else # assume SAVE is the full path to a tarball's name - TGZ=${SAVE} + TARBALL=${SAVE} + # if zstd is present and a .zst extension is asked for, use it + if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + tar czf ${TARBALL} -C ${EESSI_TMPDIR} + fi fi - tar czf ${TGZ} -C ${EESSI_TMPDIR} . - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From 1b408d3d643301c7052766e9492f52317180324d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 22:11:21 +0200 Subject: [PATCH 06/11] restore original container, to show performance --- eessi_container.sh | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index d6b7065d3f..c82d6b9936 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -408,19 +408,6 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi -# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} -if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then - if [[ "${RESUME}" == *.tgz ]]; then - tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} - # Add support for resuming from zstd-compressed tarballs - elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then - zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} - elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then - fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" - fi - echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" -fi - # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -878,30 +865,17 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used - # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. - # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - if [[ -x "$(command -v zstd)" ]]; then - TARBALL=${SAVE}/tmp_storage-${ts}.zst - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - TARBALL=${SAVE}/tmp_storage-${ts}.tgz - tar czf ${TARBALL} -C ${EESSI_TMPDIR} . - fi + TGZ=${SAVE}/tmp_storage-${ts}.tgz else # assume SAVE is the full path to a tarball's name - TARBALL=${SAVE} - # if zstd is present and a .zst extension is asked for, use it - if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - tar czf ${TARBALL} -C ${EESSI_TMPDIR} - fi + TGZ=${SAVE} fi - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" + tar czf ${TGZ} -C ${EESSI_TMPDIR} . + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From fe3c227ef4f94eb92a348a5432fce452fb549b47 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 22:28:49 +0200 Subject: [PATCH 07/11] Revert "restore original container, to show performance" This reverts commit 1b408d3d643301c7052766e9492f52317180324d. --- eessi_container.sh | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index c82d6b9936..d6b7065d3f 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -408,6 +408,19 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi +# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} +if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then + if [[ "${RESUME}" == *.tgz ]]; then + tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} + # Add support for resuming from zstd-compressed tarballs + elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then + zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} + elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then + fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" + fi + echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" +fi + # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -865,17 +878,30 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used + # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. + # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - TGZ=${SAVE}/tmp_storage-${ts}.tgz + if [[ -x "$(command -v zstd)" ]]; then + TARBALL=${SAVE}/tmp_storage-${ts}.zst + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + TARBALL=${SAVE}/tmp_storage-${ts}.tgz + tar czf ${TARBALL} -C ${EESSI_TMPDIR} . + fi else # assume SAVE is the full path to a tarball's name - TGZ=${SAVE} + TARBALL=${SAVE} + # if zstd is present and a .zst extension is asked for, use it + if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + tar czf ${TARBALL} -C ${EESSI_TMPDIR} + fi fi - tar czf ${TGZ} -C ${EESSI_TMPDIR} . - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From 8d706deea45f71469fe9e84066757e91631d173d Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 22:31:47 +0200 Subject: [PATCH 08/11] Remove samtools, add pmt --- .../accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml | 1 + .../software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml index 60d82d46ad..fbb9203fd2 100644 --- a/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -2,3 +2,4 @@ easyconfigs: - CUDA-12.1.1.eb: options: accept-eula-for: CUDA + - pmt-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml index 9c50eb671d..18620a57e0 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023b.yml @@ -45,4 +45,3 @@ easyconfigs: options: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/22469 from-commit: fc22841fef99cbb2a221c18029b15e692e78c27c - - SAMtools-1.19.2-GCC-13.2.0.eb From 17cff9176181781c33a325d364172b585d6b438c Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 22:32:38 +0200 Subject: [PATCH 09/11] Restore original container to show original performance --- eessi_container.sh | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index d6b7065d3f..c82d6b9936 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -408,19 +408,6 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi -# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} -if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then - if [[ "${RESUME}" == *.tgz ]]; then - tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} - # Add support for resuming from zstd-compressed tarballs - elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then - zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} - elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then - fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" - fi - echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" -fi - # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -878,30 +865,17 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used - # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. - # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - if [[ -x "$(command -v zstd)" ]]; then - TARBALL=${SAVE}/tmp_storage-${ts}.zst - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - TARBALL=${SAVE}/tmp_storage-${ts}.tgz - tar czf ${TARBALL} -C ${EESSI_TMPDIR} . - fi + TGZ=${SAVE}/tmp_storage-${ts}.tgz else # assume SAVE is the full path to a tarball's name - TARBALL=${SAVE} - # if zstd is present and a .zst extension is asked for, use it - if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then - tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} - else - tar czf ${TARBALL} -C ${EESSI_TMPDIR} - fi + TGZ=${SAVE} fi - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" + tar czf ${TGZ} -C ${EESSI_TMPDIR} . + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From cffa653d725b81ab227d7c7a4fb30999645ce1fe Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 22:59:37 +0200 Subject: [PATCH 10/11] Revert "Restore original container to show original performance" This reverts commit 17cff9176181781c33a325d364172b585d6b438c. --- eessi_container.sh | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/eessi_container.sh b/eessi_container.sh index c82d6b9936..d6b7065d3f 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -408,6 +408,19 @@ else echo "Using ${EESSI_HOST_STORAGE} as tmp directory (to resume session add '--resume ${EESSI_HOST_STORAGE}')." fi +# if ${RESUME} is a file, unpack it into ${EESSI_HOST_STORAGE} +if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then + if [[ "${RESUME}" == *.tgz ]]; then + tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} + # Add support for resuming from zstd-compressed tarballs + elif [[ "${RESUME}" == *.zst && -x "$(command -v zstd)" ]]; then + zstd -dc ${RESUME} | tar -xf - -C ${EESSI_HOST_STORAGE} + elif [[ "${RESUME}" == *.zst && ! -x "$(command -v zstd)" ]]; then + fatal_error "Trying to resume from tarball ${RESUME} which was compressed using zstd, but zstd command not found" + fi + echo "Resuming from previous run using temporary storage ${RESUME} unpacked into ${EESSI_HOST_STORAGE}" +fi + # if ${RESUME} is a file (assume a tgz), unpack it into ${EESSI_HOST_STORAGE} if [[ ! -z ${RESUME} && -f ${RESUME} ]]; then tar xf ${RESUME} -C ${EESSI_HOST_STORAGE} @@ -865,17 +878,30 @@ if [[ ! -z ${SAVE} ]]; then # ARCH which might have been used internally, eg, when software packages # were built ... we rather keep the script here "stupid" and leave the handling # of these aspects to where the script is used + # Compression with zlib may be quite slow. On some systems, the pipeline takes ~20 mins for a 2 min build because of this. + # Check if zstd is present for faster compression and decompression if [[ -d ${SAVE} ]]; then # assume SAVE is name of a directory to which tarball shall be written to # name format: tmp_storage-{TIMESTAMP}.tgz ts=$(date +%s) - TGZ=${SAVE}/tmp_storage-${ts}.tgz + if [[ -x "$(command -v zstd)" ]]; then + TARBALL=${SAVE}/tmp_storage-${ts}.zst + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + TARBALL=${SAVE}/tmp_storage-${ts}.tgz + tar czf ${TARBALL} -C ${EESSI_TMPDIR} . + fi else # assume SAVE is the full path to a tarball's name - TGZ=${SAVE} + TARBALL=${SAVE} + # if zstd is present and a .zst extension is asked for, use it + if [[ "${SAVE}" == *.zst && -x "$(command -v zstd)" ]]; then + tar -cf - -C ${EESSI_TMPDIR} . | zstd -T0 > ${TARBALL} + else + tar czf ${TARBALL} -C ${EESSI_TMPDIR} + fi fi - tar czf ${TGZ} -C ${EESSI_TMPDIR} . - echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TGZ}' (to resume session add '--resume ${TGZ}')" + echo "Saved contents of tmp directory '${EESSI_TMPDIR}' to tarball '${TARBALL}' (to resume session add '--resume ${TARBALL}')" fi # TODO clean up tmp by default? only retain if another option provided (--retain-tmp) From 1a419872e53c976db5c484bedce9b741b7313f0a Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Wed, 2 Apr 2025 23:06:27 +0200 Subject: [PATCH 11/11] Remove pmt again, since that was only to demo the speedup of the build pipeline --- .../accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml index fbb9203fd2..60d82d46ad 100644 --- a/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/zen4_h100/eessi-2023.06-eb-4.9.4-2023a-CUDA.yml @@ -2,4 +2,3 @@ easyconfigs: - CUDA-12.1.1.eb: options: accept-eula-for: CUDA - - pmt-1.2.0-GCCcore-12.3.0-CUDA-12.1.1.eb