diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 75b4f71178..31ce30d4fc 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -248,11 +248,21 @@ fi ### add packages here echo ">> Creating/updating Lmod cache..." -export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua" +export LMOD_CONFIG_DIR="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) -if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then +if [ ! -f $lmod_rc_file ] || [ ${lmodrc_changed} == '0' ]; then python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} - check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC" + check_exit_code $? "$lmod_rc_file created" "Failed to create $lmod_rc_file" +fi + +echo ">> Creating/updating Lmod SitePackage.lua ..." +export LMOD_PACKAGE_PATH="${EASYBUILD_INSTALLPATH}/.lmod" +lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" +sitepackage_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodsitepackage.py$' > /dev/null; echo $?) +if [ ! -f "$lmod_sitepackage_file" ] || [ "${sitepackage_changed}" == '0' ]; then + python3 $TOPDIR/create_lmodsitepackage.py ${EASYBUILD_INSTALLPATH} + check_exit_code $? "$lmod_sitepackage_file created" "Failed to create $lmod_sitepackage_file" fi $TOPDIR/update_lmod_cache.sh ${EPREFIX} ${EASYBUILD_INSTALLPATH} diff --git a/create_lmodrc.py b/create_lmodrc.py index 621c8e271a..ae65153a20 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -17,113 +17,6 @@ } """ -GPU_LMOD_RC ="""require("strict") -local hook = require("Hook") -local open = io.open - -local function read_file(path) - local file = open(path, "rb") -- r read mode and b binary mode - if not file then return nil end - local content = file:read "*a" -- *a or *all reads the whole file - file:close() - return content -end - -local function eessi_cuda_enabled_load_hook(t) - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. - -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse - -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI - local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" - if simpleName == 'CUDA' then - -- get the full host_injections path - local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') - -- build final path where the CUDA software should be installed - local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" - local cudaDirExists = isDir(cudaEasyBuildDir) - if not cudaDirExists then - local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " - advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " - advice = advice .. "can find it.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYou requested to load ", simpleName, " ", advice) - end - end - -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, - -- otherwise, refuse to load the requested module and print error message - local haveGpu = mt:haveProperty(simpleName,"arch","gpu") - if haveGpu then - local arch = os.getenv("EESSI_CPU_FAMILY") or "" - local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" - local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" - local cudaDriverExists = isFile(cudaDriverFile) - local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") - if not (cudaDriverExists or singularityCudaExists) then - local advice = "which relies on the CUDA runtime environment and driver libraries. " - advice = advice .. "In order to be able to use the module, you will need " - advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYou requested to load ", simpleName, " ", advice) - else - -- CUDA driver exists, now we check its version to see if an update is needed - if cudaDriverExists then - local cudaVersion = read_file(cudaVersionFile) - local cudaVersion_req = os.getenv("EESSICUDAVERSION") - -- driver CUDA versions don't give a patch version for CUDA - local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") - local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") - local driver_libs_need_update = false - if major < major_req then - driver_libs_need_update = true - elseif major == major_req then - if minor < minor_req then - driver_libs_need_update = true - end - end - if driver_libs_need_update == true then - local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " - advice = advice .. "Please update your CUDA driver libraries and then " - advice = advice .. "let EESSI know about the update.\\n" - advice = advice .. refer_to_docs - LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) - end - end - end - end -end - -local function eessi_openmpi_load_hook(t) - -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, - -- to work around hang/crash due to bug in OpenMPI; - -- see https://gitlab.com/eessi/support/-/issues/41 - local frameStk = require("FrameStk"):singleton() - local mt = frameStk:mt() - local moduleName = string.match(t.modFullName, "(.-)/") - local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" - if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then - local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" - LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") - local ompiMcaBtl = os.getenv("OMPI_MCA_btl") - if ompiMcaBtl == nil then - setenv("OMPI_MCA_btl", "^smcuda") - else - setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") - end - end -end - --- Combine both functions into a single one, as we can only register one function as load hook in lmod --- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed -function eessi_load_hook(t) - eessi_cuda_enabled_load_hook(t) - eessi_openmpi_load_hook(t) -end - - -hook.register("load", eessi_load_hook) -""" def error(msg): sys.stderr.write("ERROR: %s\n" % msg) @@ -143,7 +36,6 @@ def error(msg): 'dot_lmod': DOT_LMOD, 'prefix': prefix, } -lmodrc_txt += '\n' + GPU_LMOD_RC try: os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True) with open(lmodrc_path, 'w') as fp: diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py new file mode 100755 index 0000000000..9a4a232863 --- /dev/null +++ b/create_lmodsitepackage.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# +# Create SitePackage.lua configuration file for Lmod. +# +import os +import sys + +DOT_LMOD = '.lmod' + +hook_txt ="""require("strict") +local hook = require("Hook") +local open = io.open + +local function read_file(path) + local file = open(path, "rb") -- r read mode and b binary mode + if not file then return nil end + local content = file:read "*a" -- *a or *all reads the whole file + file:close() + return content +end + +local function eessi_cuda_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse + -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'CUDA' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the CUDA software should be installed + local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudaDirExists = isDir(cudaEasyBuildDir) + if not cudaDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, + -- otherwise, refuse to load the requested module and print error message + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") + if haveGpu then + local arch = os.getenv("EESSI_CPU_FAMILY") or "" + local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cudaDriverExists = isFile(cudaDriverFile) + local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") + if not (cudaDriverExists or singularityCudaExists) then + local advice = "which relies on the CUDA runtime environment and driver libraries. " + advice = advice .. "In order to be able to use the module, you will need " + advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + else + -- CUDA driver exists, now we check its version to see if an update is needed + if cudaDriverExists then + local cudaVersion = read_file(cudaVersionFile) + local cudaVersion_req = os.getenv("EESSICUDAVERSION") + -- driver CUDA versions don't give a patch version for CUDA + local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") + local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") + local driver_libs_need_update = false + if major < major_req then + driver_libs_need_update = true + elseif major == major_req then + if minor < minor_req then + driver_libs_need_update = true + end + end + if driver_libs_need_update == true then + local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " + advice = advice .. "Please update your CUDA driver libraries and then " + advice = advice .. "let EESSI know about the update.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) + end + end + end + end +end + +local function eessi_openmpi_load_hook(t) + -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1, + -- to work around hang/crash due to bug in OpenMPI; + -- see https://gitlab.com/eessi/support/-/issues/41 + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local moduleName = string.match(t.modFullName, "(.-)/") + local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or "" + if (moduleName == "OpenMPI") and (cpuTarget == "aarch64/neoverse_v1") then + local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI" + LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)") + local ompiMcaBtl = os.getenv("OMPI_MCA_btl") + if ompiMcaBtl == nil then + setenv("OMPI_MCA_btl", "^smcuda") + else + setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda") + end + end +end + +-- Combine both functions into a single one, as we can only register one function as load hook in lmod +-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed +function eessi_load_hook(t) + eessi_cuda_enabled_load_hook(t) + eessi_openmpi_load_hook(t) +end + + +hook.register("load", eessi_load_hook) +""" + +def error(msg): + sys.stderr.write("ERROR: %s\n" % msg) + sys.exit(1) + + +if len(sys.argv) != 2: + error("Usage: %s " % sys.argv[0]) + +prefix = sys.argv[1] + +if not os.path.exists(prefix): + error("Prefix directory %s does not exist!" % prefix) + +sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') +try: + os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) + with open(sitepackage_path, 'w') as fp: + fp.write(hook_txt) + +except (IOError, OSError) as err: + error("Failed to create %s: %s" % (sitepackage_path, err)) + +print(sitepackage_path) diff --git a/create_tarball.sh b/create_tarball.sh index a619df9439..2d77acfc43 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -51,6 +51,11 @@ if [ -d ${eessi_version}/scripts ]; then find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list} fi +# also include init, which is also copied by install_scripts.sh +if [ -d ${eessi_version}/init ]; then + find ${eessi_version}/init -type f | grep -v '/\.wh\.' >> ${files_list} +fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then # module files find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index e042e8575a..a3ff265e77 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -85,6 +85,22 @@ if [ -d $EESSI_PREFIX ]; then false fi + export LMOD_CONFIG_DIR="$EESSI_SOFTWARE_PATH/.lmod" + lmod_rc_file="$LMOD_CONFIG_DIR/lmodrc.lua" + if [ -f $lmod_rc_file ]; then + show_msg "Found Lmod configuration file at $lmod_rc_file" + else + error "Lmod configuration file not found at $lmod_rc_file" + fi + + export LMOD_PACKAGE_PATH="$EESSI_SOFTWARE_PATH/.lmod" + lmod_sitepackage_file="$LMOD_PACKAGE_PATH/SitePackage.lua" + if [ -f $lmod_sitepackage_file ]; then + show_msg "Found Lmod SitePackage.lua file at $lmod_sitepackage_file" + else + error "Lmod SitePackage.lua file not found at $lmod_sitepackage_file" + fi + else error "EESSI software layer at $EESSI_SOFTWARE_PATH not found!" fi diff --git a/install_scripts.sh b/install_scripts.sh index 6e6cd825ac..508735975c 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -25,6 +25,35 @@ compare_and_copy() { fi } +copy_files_by_list() { +# Compares and copies listed files from a source to a target directory + if [ ! "$#" -ge 3 ]; then + echo "Usage of function: copy_files_by_list " + echo "Here, file_list is an (expanded) bash array" + echo "Example:" + echo "my_files=(file1 file2)" + echo 'copy_files_by_list /my/source /my/target "${my_files[@]}"' + return 1 + fi + source_dir="$1" + target_dir="$2" + # Need to shift all arguments to the left twice. Then, rebuild the array with the rest of the arguments + shift + shift + file_list=("$@") + + # Create target dir + mkdir -p ${target_dir} + + # Copy from source to target + echo "Copying files: ${file_list[@]}" + echo "From directory: ${source_dir}" + echo "To directory: ${target_dir}" + + for file in ${file_list[@]}; do + compare_and_copy ${source_dir}/${file} ${target_dir}/${file} + done +} POSITIONAL_ARGS=() @@ -54,28 +83,33 @@ set -- "${POSITIONAL_ARGS[@]}" TOPDIR=$(dirname $(realpath $0)) -# Subdirs for generic scripts -SCRIPTS_DIR_SOURCE=${TOPDIR}/scripts # Source dir -SCRIPTS_DIR_TARGET=${INSTALL_PREFIX}/scripts # Target dir +# Copy for init directory +init_files=( + bash eessi_archdetect.sh eessi_defaults eessi_environment_variables eessi_software_subdir_for_host.py + minimal_eessi_env README.md test.py +) +copy_files_by_list ${TOPDIR}/init ${INSTALL_PREFIX}/init "${init_files[@]}" -# Create target dir -mkdir -p ${SCRIPTS_DIR_TARGET} +# Copy for the init/arch_specs directory +arch_specs_files=( + eessi_arch_arm.spec eessi_arch_ppc.spec eessi_arch_x86.spec +) +copy_files_by_list ${TOPDIR}/init/arch_specs ${INSTALL_PREFIX}/init/arch_specs "${arch_specs_files[@]}" -# Copy scripts into this prefix -echo "copying scripts from ${SCRIPTS_DIR_SOURCE} to ${SCRIPTS_DIR_TARGET}" -for file in utils.sh; do - compare_and_copy ${SCRIPTS_DIR_SOURCE}/${file} ${SCRIPTS_DIR_TARGET}/${file} -done -# Subdirs for GPU support -NVIDIA_GPU_SUPPORT_DIR_SOURCE=${SCRIPTS_DIR_SOURCE}/gpu_support/nvidia # Source dir -NVIDIA_GPU_SUPPORT_DIR_TARGET=${SCRIPTS_DIR_TARGET}/gpu_support/nvidia # Target dir - -# Create target dir -mkdir -p ${NVIDIA_GPU_SUPPORT_DIR_TARGET} - -# Copy files from this directory into the prefix -# To be on the safe side, we dont do recursive copies, but we are explicitely copying each individual file we want to add -echo "copying scripts from ${NVIDIA_GPU_SUPPORT_DIR_SOURCE} to ${NVIDIA_GPU_SUPPORT_DIR_TARGET}" -for file in install_cuda_host_injections.sh link_nvidia_host_libraries.sh; do - compare_and_copy ${NVIDIA_GPU_SUPPORT_DIR_SOURCE}/${file} ${NVIDIA_GPU_SUPPORT_DIR_TARGET}/${file} -done +# Copy for init/Magic_castle directory +mc_files=( + bash eessi_python3 +) +copy_files_by_list ${TOPDIR}/init/Magic_Castle ${INSTALL_PREFIX}/init/Magic_Castle "${mc_files[@]}" + +# Copy for the scripts directory +script_files=( + utils.sh +) +copy_files_by_list ${TOPDIR}/scripts ${INSTALL_PREFIX}/scripts "${script_files[@]}" + +# Copy files for the scripts/gpu_support/nvidia directory +nvidia_files=( + install_cuda_host_injections.sh link_nvidia_host_libraries.sh +) +copy_files_by_list ${TOPDIR}/scripts/gpu_support/nvidia ${INSTALL_PREFIX}/scripts/gpu_support/nvidia "${nvidia_files[@]}"