From a0dc2815c4f45162aed56bf2c5e7bfcdc8b0eaf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 26 Mar 2024 11:03:33 +0100 Subject: [PATCH 1/5] fix comment for creating lmod rc code block --- EESSI-install-software.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 31ce30d4fc..b2984af856 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -247,7 +247,7 @@ fi ### add packages here -echo ">> Creating/updating Lmod cache..." +echo ">> Creating/updating Lmod RC file..." export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) From b2570ed8cca9d7f87d3c506ec33d7b0c7b033671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 26 Mar 2024 11:03:50 +0100 Subject: [PATCH 2/5] don't update lmod cache here anymore --- EESSI-install-software.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index b2984af856..7b7a60686c 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -265,7 +265,5 @@ if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; the check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" fi -$TOPDIR/update_lmod_cache.sh ${EPREFIX} ${EASYBUILD_INSTALLPATH} - echo ">> Cleaning up ${TMPDIR}..." rm -r ${TMPDIR} From cdf78d1b0d6914de0810ee0cd9ffc95cd4743e9b Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 26 Mar 2024 16:33:29 +0100 Subject: [PATCH 3/5] adding a bit more progress output to install_cuda_host_injections.sh --- scripts/gpu_support/nvidia/install_cuda_host_injections.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh index a9310d817a..ba4f7e8a4c 100755 --- a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -165,12 +165,14 @@ else error="${error}Please re-run this script with the 'eb' command available." fatal_error "${error}" fi + echo_yellow "EasyBuild module loaded" fi cuda_easyconfig="CUDA-${install_cuda_version}.eb" # Check the easyconfig file is available in the release # (eb search always returns 0, so we need a grep to ensure a usable exit code) + echo_yellow "Searching for easyconfig '${cuda_easyconfig}'" eb --search ^${cuda_easyconfig}|grep CUDA > /dev/null 2>&1 # Check the exit code if [ $? -ne 0 ]; then @@ -185,6 +187,7 @@ else error="${error}${available_cuda_easyconfigs}" fatal_error "${error}" fi + echo_yellow "Search for easyconfig '${cuda_easyconfig}' done" # We need the --rebuild option, as the CUDA module may or may not be on the # `MODULEPATH` yet. Even if it is, we still want to redo this installation @@ -195,10 +198,13 @@ else extra_args="--rebuild --installpath-modules=${tmpdir}" # We don't want hooks used in this install, we need a vanilla CUDA installation + echo_yellow "Creating empty hooks file" touch "$tmpdir"/none.py # shellcheck disable=SC2086 # Intended splitting of extra_args + echo_yellow "Running 'eb --prefix=$tmpdir ${extra_args} --accept-eula-for=CUDA --hooks=$tmpdir/none.py --installpath=${cuda_install_parent}/ ${cuda_easyconfig}'" eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" ret=$? + echo_yellow "eb command finished with exit code '$ret'" if [ $ret -ne 0 ]; then eb_last_log=$(unset EB_VERBOSE; eb --last-log) cp -a ${eb_last_log} . From 2bd1ada9f20a504d1e442e2ef8d1bb0fe9e8fd0c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 26 Mar 2024 19:16:26 +0100 Subject: [PATCH 4/5] added more debug messages --- scripts/gpu_support/nvidia/install_cuda_host_injections.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh index ba4f7e8a4c..8c81fdb31f 100755 --- a/scripts/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -155,6 +155,13 @@ else fi if ! command -v "eb" &>/dev/null; then + env | sort + module --version + declare -f module + module list + ls ${EESSI_SOFTWARE_PATH} + ls ${cuda_install_parent} + echo_yellow "Attempting to load an EasyBuild module to do actual install" module load EasyBuild # There are some scenarios where this may fail From db912759e627398bf5db772915f17c26c26ffac9 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 26 Mar 2024 20:18:03 +0100 Subject: [PATCH 5/5] use older apptainer (1.2.4) --- install_apptainer_ubuntu.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/install_apptainer_ubuntu.sh b/install_apptainer_ubuntu.sh index c35c34cda6..6cf9a6f48e 100755 --- a/install_apptainer_ubuntu.sh +++ b/install_apptainer_ubuntu.sh @@ -5,8 +5,11 @@ set -e # see https://github.com/apptainer/singularity/issues/5390#issuecomment-899111181 sudo apt-get install alien alien --version -apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') -curl -OL https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/${apptainer_rpm} +#apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') +#curl -OL https://dl.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/a/${apptainer_rpm} +#/pub/archive/epel/8.8/Everything/x86_64/Packages/a +apptainer_rpm=$(curl --silent -L https://dl.fedoraproject.org/pub/archive/epel/8.8/Everything/x86_64/Packages/a/ | grep 'apptainer-[0-9]' | sed 's/.*\(apptainer[0-9._a-z-]*.rpm\).*/\1/g') +curl -OL https://dl.fedoraproject.org/pub/archive/epel/8.8/Everything/x86_64/Packages/a/${apptainer_rpm} sudo alien -d ${apptainer_rpm} sudo apt install ./apptainer*.deb apptainer --version