From 8b5b606f19286672fc9620406bc4b7d6fba08c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Wed, 11 Dec 2024 12:06:01 +0100 Subject: [PATCH 01/20] start host mpi injection --- .../install_openmpi_host_injection.sh | 209 ++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 scripts/mpi_support/install_openmpi_host_injection.sh diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh new file mode 100644 index 0000000000..79cc428099 --- /dev/null +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -0,0 +1,209 @@ +#!/usr/bin/env bash + +# This script can be used to install the host MPI libraries under the `.../host_injections` directory. +# It allows EESSI software to use the MPI stack from the host. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation" + echo " --pmix-path /path/to/mpi Specify the path to the PMIX host installation" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the mpi injection" + echo " (must have >10GB available)" +} + + +# Global associative array with os-release info +declare -A OS_RELEASE + +get_os_release() { + local key + local value + + while read -r key value; do + OS_RELEASE[${key}]="${value}" + done < <(awk -F = 'gsub(/"/, "", $2); {print $1, $2}' /etc/os-release) +} + + +parse_cmdline() { + while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --mpi-path) + if [ -n "$2" ]; then + MPI_PATH="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + --pmix-path) + if [ -n "$2" ]; then + PMIX_PATH="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac + done +} + + +# ****Warning: patchelf v0.18.0 (currently shipped with EESSI) does not work.**** +# We get v0.17.2 +download_patchelf() { + local patchelf_version="0.17.2" + local url + + url="https://github.com/NixOS/patchelf/releases/download/${patchelf_version}/" + url+="patchelf-${patchelf_version}-${EESSI_CPU_FAMILY}.tar.gz" + + curl ${url} ${CURL_OPTS} -o ${TEMP_DIR}/patchelf.tar.gz + tar -xf ${TEMP_DIR}/patchelf.tar.gz -C ${TEMP_DIR} + PATCHELF_BIN=${TEMP_DIR}/bin/patchelf +} + + +inject_mpi() { + local efa_path="${AMAZON_PATH}/efa" + local openmpi_path="${MPI_PATH}" + local pmix_path="${PMIX_PATH}" + + local eessi_ldd="${EESSI_EPREFIX}/usr/bin/ldd" + local system_ldd="/usr/bin/ldd" + + (( OPENMPI_VERSION == 5 )) && openmpi_path+=5 + + local host_injection_mpi_path + + host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} + host_injection_mpi_path+="/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR}" + host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" + + if [ -d ${host_injection_mpi_path} ]; then + echo "MPI was already injected" + return 0 + fi + + sudo mkdir -p ${host_injection_mpi_path} + + local temp_inject_path="${TEMP_DIR}/mpi_inject" + mkdir ${temp_inject_path} + + # Get all library files from efa and openmpi dirs + find ${efa_path} ${openmpi_path} ${pmix_path} -maxdepth 2 -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; + + # Copy library links to host injection path + sudo find ${efa_path} ${openmpi_path} ${pmix_path} -maxdepth 2 -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + # Get system libefa.so and libibverbs.so + find /lib/ /lib64/ \( -name "libefa.so*" -or -name "libibverbs.so*" \) -type f -exec cp {} ${temp_inject_path} \; + sudo find /lib/ /lib64/ \( -name "libefa.so*" -or -name "libibverbs.so*" \) -type l -exec cp -P {} ${host_injection_mpi_path} \; + + + # Get MPI libs dependencies from system ldd + local libname libpath + local -A libs_arr + + while read -r libname libpath; do + [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + libs_arr[${libname}]=${libpath} + done < <(cat <(${system_ldd} ${temp_inject_path}/*) <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${system_ldd}) | awk '/=>/{print $1, $3}' | sort | uniq) + + # Get MPI related lib dependencies not resolved by EESSI ldd + local lib + + while read -r lib; do + local dep + + ${PATCHELF_BIN} --set-rpath "" ${lib} + + while read -r dep; do + if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} + fi + done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) + + # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so + if [[ ${lib} =~ libmpi\.so ]]; then + while read -r dep; do + ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} + done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) + fi + + done < <(find ${temp_inject_path} -type f) + + # Sanity check MPI injection + if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then + sudo cp ${temp_inject_path}/* -t ${host_injection_mpi_path} + echo_green "MPI injection was successful" + return 0 + else + fatal_error "MPI host injection failed. EESSI will use its own MPI libraries" + fi +} + + +main() { + process_cmdline "$@" + get_os_release + check_eessi_initialised + + # we need a directory we can use for temporary storage + if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${TEMP_DIR}"/temp + if ! mkdir -p "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + echo "OpenMPI version to inject: ${OPENMPI_VERSION}" + download_patchelf + inject_mpi + + rm -rf "${tmpdir}" + echo "EESSI setup completed with success" +} + +main "$@" From 84f30b1f41d9ba891367377ca9e71159a6072f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 17 Feb 2025 13:00:55 +0100 Subject: [PATCH 02/20] remove tabs --- .../install_openmpi_host_injection.sh | 166 +++++++++--------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 79cc428099..153f385b41 100644 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -20,7 +20,7 @@ show_help() { echo "Options:" echo " --help Display this help message" echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation" - echo " --pmix-path /path/to/mpi Specify the path to the PMIX host installation" + echo " --pmix-path /path/to/mpi Specify the path to the PMIX host installation" echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" echo " storage during the mpi injection" echo " (must have >10GB available)" @@ -35,54 +35,54 @@ get_os_release() { local value while read -r key value; do - OS_RELEASE[${key}]="${value}" + OS_RELEASE[${key}]="${value}" done < <(awk -F = 'gsub(/"/, "", $2); {print $1, $2}' /etc/os-release) } parse_cmdline() { - while [[ $# -gt 0 ]]; do - case "$1" in - --help) - show_help - exit 0 - ;; - --mpi-path) - if [ -n "$2" ]; then - MPI_PATH="$2" - shift 2 - else - echo "Error: Argument required for $1" - show_help - exit 1 - fi - ;; - --pmix-path) - if [ -n "$2" ]; then - PMIX_PATH="$2" - shift 2 - else - echo "Error: Argument required for $1" - show_help - exit 1 - fi - ;; - -t|--temp-dir) - if [ -n "$2" ]; then - TEMP_DIR="$2" - shift 2 - else - echo "Error: Argument required for $1" - show_help - exit 1 - fi - ;; - *) - show_help - fatal_error "Error: Unknown option: $1" - ;; - esac - done + while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --mpi-path) + if [ -n "$2" ]; then + MPI_PATH="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + --pmix-path) + if [ -n "$2" ]; then + PMIX_PATH="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac + done } @@ -113,13 +113,13 @@ inject_mpi() { local host_injection_mpi_path - host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} + host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} host_injection_mpi_path+="/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR}" host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" if [ -d ${host_injection_mpi_path} ]; then - echo "MPI was already injected" - return 0 + echo "MPI was already injected" + return 0 fi sudo mkdir -p ${host_injection_mpi_path} @@ -143,42 +143,42 @@ inject_mpi() { local -A libs_arr while read -r libname libpath; do - [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - libs_arr[${libname}]=${libpath} + [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + libs_arr[${libname}]=${libpath} done < <(cat <(${system_ldd} ${temp_inject_path}/*) <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${system_ldd}) | awk '/=>/{print $1, $3}' | sort | uniq) # Get MPI related lib dependencies not resolved by EESSI ldd local lib while read -r lib; do - local dep + local dep - ${PATCHELF_BIN} --set-rpath "" ${lib} + ${PATCHELF_BIN} --set-rpath "" ${lib} - while read -r dep; do - if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then - ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} - fi - done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) + while read -r dep; do + if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} + fi + done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) - # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so - if [[ ${lib} =~ libmpi\.so ]]; then - while read -r dep; do - ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} - done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) - fi + # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so + if [[ ${lib} =~ libmpi\.so ]]; then + while read -r dep; do + ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} + done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) + fi done < <(find ${temp_inject_path} -type f) # Sanity check MPI injection if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then - sudo cp ${temp_inject_path}/* -t ${host_injection_mpi_path} - echo_green "MPI injection was successful" - return 0 + sudo cp ${temp_inject_path}/* -t ${host_injection_mpi_path} + echo_green "MPI injection was successful" + return 0 else - fatal_error "MPI host injection failed. EESSI will use its own MPI libraries" + fatal_error "MPI host injection failed. EESSI will use its own MPI libraries" fi } @@ -186,21 +186,21 @@ inject_mpi() { main() { process_cmdline "$@" get_os_release - check_eessi_initialised - - # we need a directory we can use for temporary storage - if [[ -z "${TEMP_DIR}" ]]; then - tmpdir=$(mktemp -d) - else - tmpdir="${TEMP_DIR}"/temp - if ! mkdir -p "$tmpdir" ; then - fatal_error "Could not create directory ${tmpdir}" - fi - fi - - echo "OpenMPI version to inject: ${OPENMPI_VERSION}" - download_patchelf - inject_mpi + check_eessi_initialised + + # we need a directory we can use for temporary storage + if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${TEMP_DIR}"/temp + if ! mkdir -p "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + echo "OpenMPI version to inject: ${OPENMPI_VERSION}" + download_patchelf + inject_mpi rm -rf "${tmpdir}" echo "EESSI setup completed with success" From 3851e0167cae250461a6a2a3dec3dc7a91355799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 17 Feb 2025 13:23:24 +0100 Subject: [PATCH 03/20] fix indentation --- scripts/mpi_support/install_openmpi_host_injection.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 153f385b41..cddc53cb51 100644 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -143,10 +143,10 @@ inject_mpi() { local -A libs_arr while read -r libname libpath; do - [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - libs_arr[${libname}]=${libpath} + [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) + libs_arr[${libname}]=${libpath} done < <(cat <(${system_ldd} ${temp_inject_path}/*) <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${system_ldd}) | awk '/=>/{print $1, $3}' | sort | uniq) # Get MPI related lib dependencies not resolved by EESSI ldd From 95f917dfa58d4ef392dab576b1540b50b08aeaed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 17 Feb 2025 16:34:49 +0100 Subject: [PATCH 04/20] fix indentation --- .../install_openmpi_host_injection.sh | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index cddc53cb51..ab45e8d0a6 100644 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -153,22 +153,22 @@ inject_mpi() { local lib while read -r lib; do - local dep + local dep - ${PATCHELF_BIN} --set-rpath "" ${lib} + ${PATCHELF_BIN} --set-rpath "" ${lib} - while read -r dep; do - if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then - ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} - fi - done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) - - # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so - if [[ ${lib} =~ libmpi\.so ]]; then while read -r dep; do - ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} - done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) - fi + if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} + fi + done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) + + # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so + if [[ ${lib} =~ libmpi\.so ]]; then + while read -r dep; do + ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} + done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) + fi done < <(find ${temp_inject_path} -type f) From 4251a12993f5212f733318f306c11f2138889c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Thu, 27 Feb 2025 19:42:12 +0100 Subject: [PATCH 05/20] generalize script --- .../install_openmpi_host_injection.sh | 164 ++++++++++-------- 1 file changed, 89 insertions(+), 75 deletions(-) mode change 100644 => 100755 scripts/mpi_support/install_openmpi_host_injection.sh diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh old mode 100644 new mode 100755 index ab45e8d0a6..7c38d05339 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -11,7 +11,7 @@ # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) -source "$TOPDIR"/../../utils.sh +source "$TOPDIR"/../utils.sh # Function to display help message @@ -19,24 +19,10 @@ show_help() { echo "Usage: $0 [OPTIONS]" echo "Options:" echo " --help Display this help message" - echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation" - echo " --pmix-path /path/to/mpi Specify the path to the PMIX host installation" + echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation (Required)" echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" echo " storage during the mpi injection" - echo " (must have >10GB available)" -} - - -# Global associative array with os-release info -declare -A OS_RELEASE - -get_os_release() { - local key - local value - - while read -r key value; do - OS_RELEASE[${key}]="${value}" - done < <(awk -F = 'gsub(/"/, "", $2); {print $1, $2}' /etc/os-release) + echo " --noclean Do not remove the temporary directory after finishing injection" } @@ -49,68 +35,73 @@ parse_cmdline() { ;; --mpi-path) if [ -n "$2" ]; then - MPI_PATH="$2" + readonly MPI_PATH="$2" shift 2 else - echo "Error: Argument required for $1" - show_help - exit 1 - fi - ;; - --pmix-path) - if [ -n "$2" ]; then - PMIX_PATH="$2" - shift 2 - else - echo "Error: Argument required for $1" + echo_red "Error: Argument required for $1" show_help exit 1 fi ;; -t|--temp-dir) if [ -n "$2" ]; then - TEMP_DIR="$2" + readonly TEMP_DIR="$2" shift 2 else - echo "Error: Argument required for $1" + echo_red "Error: Argument required for $1" show_help exit 1 fi ;; + --noclean) + CLEAN=false + shift 1 + ;; *) + echo_red "Error: Unknown option: $1" show_help - fatal_error "Error: Unknown option: $1" + exit 1 ;; esac done + if [ -z "${MPI_PATH}" ]; then + echo_yellow "MPI path was not specified and it is required" + show_help + exit 0 + fi + readonly CLEAN=${CLEAN:=true} } # ****Warning: patchelf v0.18.0 (currently shipped with EESSI) does not work.**** # We get v0.17.2 download_patchelf() { + # Temporary directory to save patchelf + local tmpdir=$1 + local patchelf_version="0.17.2" local url + local curl_opts="-L --silent --show-error --fail" url="https://github.com/NixOS/patchelf/releases/download/${patchelf_version}/" url+="patchelf-${patchelf_version}-${EESSI_CPU_FAMILY}.tar.gz" - curl ${url} ${CURL_OPTS} -o ${TEMP_DIR}/patchelf.tar.gz - tar -xf ${TEMP_DIR}/patchelf.tar.gz -C ${TEMP_DIR} - PATCHELF_BIN=${TEMP_DIR}/bin/patchelf + local patchelf_path=${tmpdir}/patchelf + mkdir ${patchelf_path} + + curl ${url} ${curl_opts} -o ${patchelf_path}/patchelf.tar.gz + tar -xf ${patchelf_path}/patchelf.tar.gz -C ${patchelf_path} + PATCHELF_BIN=${patchelf_path}/bin/patchelf } inject_mpi() { - local efa_path="${AMAZON_PATH}/efa" - local openmpi_path="${MPI_PATH}" - local pmix_path="${PMIX_PATH}" + # Temporary directory for injection + local tmpdir=$1 local eessi_ldd="${EESSI_EPREFIX}/usr/bin/ldd" local system_ldd="/usr/bin/ldd" - (( OPENMPI_VERSION == 5 )) && openmpi_path+=5 - local host_injection_mpi_path host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} @@ -118,63 +109,81 @@ inject_mpi() { host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" if [ -d ${host_injection_mpi_path} ]; then - echo "MPI was already injected" - return 0 + if [ -n "$(ls -A ${host_injection_mpi_path})" ]; then + echo "MPI was already injected" + return 0 + fi fi - sudo mkdir -p ${host_injection_mpi_path} + mkdir -p ${host_injection_mpi_path} - local temp_inject_path="${TEMP_DIR}/mpi_inject" + local temp_inject_path="${tmpdir}/mpi_inject" mkdir ${temp_inject_path} - # Get all library files from efa and openmpi dirs - find ${efa_path} ${openmpi_path} ${pmix_path} -maxdepth 2 -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; + # Get all library files from openmpi dir + find ${MPI_PATH} -maxdepth 1 -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; # Copy library links to host injection path - sudo find ${efa_path} ${openmpi_path} ${pmix_path} -maxdepth 2 -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; - - # Get system libefa.so and libibverbs.so - find /lib/ /lib64/ \( -name "libefa.so*" -or -name "libibverbs.so*" \) -type f -exec cp {} ${temp_inject_path} \; - sudo find /lib/ /lib64/ \( -name "libefa.so*" -or -name "libibverbs.so*" \) -type l -exec cp -P {} ${host_injection_mpi_path} \; - + find ${MPI_PATH} -maxdepth 1 -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; # Get MPI libs dependencies from system ldd local libname libpath - local -A libs_arr + local -A libs_dict + local -a dlopen_libs + + readarray -d '' dlopen_libs < <(find ${MPI_PATH} -mindepth 2 -name "*.so*") + # Get all library names and paths in associative array while read -r libname libpath; do - [[ ${libpath} =~ ${AMAZON_PATH}/.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libefa\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - [[ ${libname} =~ libibverbs\.so\.?.* ]] && libpath=${host_injection_mpi_path}/$(basename ${libpath}) - libs_arr[${libname}]=${libpath} - done < <(cat <(${system_ldd} ${temp_inject_path}/*) <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${system_ldd}) | awk '/=>/{print $1, $3}' | sort | uniq) + # If library is libfabric or from the MPI path, modify libpath in assoc array to point to host_injection_mpi_path + if [[ ${libname} =~ libfabric\.so\.?.* ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work + find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; + find ${libdir} -maxdepth 1 -type l -name "libfabric.so*" -exec cp -P {} ${host_injection_mpi_path} \; + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + + if [[ ${libpath} =~ ${MPI_PATH}/.* ]]; then + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + + libs_dict[${libname}]=${libpath} + done < <(cat <(find ${temp_inject_path} -maxdepth 1 -type f -name "*.so*" -exec ${system_ldd} {} \;) \ + <(for dlopen in ${dlopen_libs[@]}; do ${system_ldd} ${dlopen}; done) \ + | awk '/=>/ {print $1, $3}' | sort | uniq) # Get MPI related lib dependencies not resolved by EESSI ldd local lib - while read -r lib; do local dep - ${PATCHELF_BIN} --set-rpath "" ${lib} - + ${PATCHELF_BIN} --set-rpath "${host_injection_mpi_path}" ${lib} while read -r dep; do - if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then - ${PATCHELF_BIN} --replace-needed ${dep} ${libs_arr[${dep}]} ${lib} - fi - done < <(${eessi_ldd} ${lib} | awk '/not found/ || /libefa/ || /libibverbs/ {print $1}' | sort | uniq) + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + done < <(${eessi_ldd} ${lib} | awk '/not found/ {print $1}' | sort | uniq) # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so if [[ ${lib} =~ libmpi\.so ]]; then while read -r dep; do - ${PATCHELF_BIN} --add-needed ${libs_arr[${dep}]} ${lib} - done < <(comm -23 <(find ${openmpi_path} -mindepth 3 -name "*.so*" -print0 | xargs -0 ${eessi_ldd} | awk '/not found/ {print $1}' | sort | uniq) <(${PATCHELF_BIN} --print-needed ${lib} | sort)) + if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} + fi + done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ + | grep -e "=> not found" -e "=> ${MPI_PATH}" | awk '{print $1}' | sort | uniq) + fi + + # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) + if [[ ${lib} =~ libfabric\.so\.?.* ]]; then + while read -r dep; do + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) fi done < <(find ${temp_inject_path} -type f) # Sanity check MPI injection if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then - sudo cp ${temp_inject_path}/* -t ${host_injection_mpi_path} + cp ${temp_inject_path}/* -t ${host_injection_mpi_path} echo_green "MPI injection was successful" return 0 else @@ -184,10 +193,13 @@ inject_mpi() { main() { - process_cmdline "$@" - get_os_release + parse_cmdline "$@" check_eessi_initialised + # Create directory linked by host_injections + local inject_dir=$(readlink -f /cvmfs/software.eessi.io/host_injections) + [[ ! -d ${inject_dir} ]] && mkdir -p ${inject_dir} + # we need a directory we can use for temporary storage if [[ -z "${TEMP_DIR}" ]]; then tmpdir=$(mktemp -d) @@ -198,12 +210,14 @@ main() { fi fi - echo "OpenMPI version to inject: ${OPENMPI_VERSION}" - download_patchelf - inject_mpi + echo "Temporary directory for injection: ${tmpdir}" - rm -rf "${tmpdir}" - echo "EESSI setup completed with success" + download_patchelf ${tmpdir} + inject_mpi ${tmpdir} + + if ${CLEAN}; then + rm -rf "${tmpdir}" + fi } main "$@" From 4c89f2cd5e3823902ae7d2fa2570ca8dac0d55a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Fri, 28 Feb 2025 17:55:58 +0100 Subject: [PATCH 06/20] add to libmpi only not found libs in dlopen libraries --- scripts/mpi_support/install_openmpi_host_injection.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 7c38d05339..035d8d739a 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -168,8 +168,7 @@ inject_mpi() { if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} fi - done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ - | grep -e "=> not found" -e "=> ${MPI_PATH}" | awk '{print $1}' | sort | uniq) + done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done | awk '/not found/ {print $1}' | sort | uniq) fi # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) From e9efd6e6c7747624acfc11de2750f13e3b18db92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 3 Mar 2025 12:47:04 +0100 Subject: [PATCH 07/20] fix mpi injection path --- scripts/mpi_support/install_openmpi_host_injection.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 035d8d739a..96158a409a 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -105,7 +105,6 @@ inject_mpi() { local host_injection_mpi_path host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} - host_injection_mpi_path+="/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR}" host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" if [ -d ${host_injection_mpi_path} ]; then From 9c067d8d1aa4d3db5e87126804d8141c1d1967bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 3 Mar 2025 16:20:31 +0100 Subject: [PATCH 08/20] inject the libpmix library --- scripts/mpi_support/install_openmpi_host_injection.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 96158a409a..45b1bef8eb 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -142,11 +142,19 @@ inject_mpi() { libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi + if [[ ${libname} =~ libpmix\.so\.?.* ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work + find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; + find ${libdir} -maxdepth 1 -type l -name "libpmix.so*" -exec cp -P {} ${host_injection_mpi_path} \; + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + if [[ ${libpath} =~ ${MPI_PATH}/.* ]]; then libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi libs_dict[${libname}]=${libpath} + done < <(cat <(find ${temp_inject_path} -maxdepth 1 -type f -name "*.so*" -exec ${system_ldd} {} \;) \ <(for dlopen in ${dlopen_libs[@]}; do ${system_ldd} ${dlopen}; done) \ | awk '/=>/ {print $1, $3}' | sort | uniq) From 1dca91178d92b052c1b0c10847ecf5b905218300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 3 Mar 2025 18:40:51 +0100 Subject: [PATCH 09/20] revert to inject dependencies from dlopen libs in mpi path --- scripts/mpi_support/install_openmpi_host_injection.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 45b1bef8eb..560ffec5e0 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -149,7 +149,7 @@ inject_mpi() { libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi - if [[ ${libpath} =~ ${MPI_PATH}/.* ]]; then + if [[ ${libpath} =~ ${MPI_PATH} ]]; then libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi @@ -173,9 +173,11 @@ inject_mpi() { if [[ ${lib} =~ libmpi\.so ]]; then while read -r dep; do if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then - ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} fi - done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done | awk '/not found/ {print $1}' | sort | uniq) + done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ + | grep -e "=> not found" -e "=> ${MPI_PATH}" | awk '!/libmpi\.so.*/ {print $1}' | sort | uniq) + fi fi # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) From 0cf08c541ef241ff35940702079417e87f9bff8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 3 Mar 2025 18:41:15 +0100 Subject: [PATCH 10/20] add pmix injection --- .../install_openmpi_host_injection.sh | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 560ffec5e0..7b031b461e 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -43,6 +43,16 @@ parse_cmdline() { exit 1 fi ;; + --pmix-path) + if [ -n "$2"]; then + readonly PMIX_PATH="$2" + shift 2 + else + echo_red "Error: Argument required for $1" + show_help + exit 1 + fi + ;; -t|--temp-dir) if [ -n "$2" ]; then readonly TEMP_DIR="$2" @@ -69,6 +79,12 @@ parse_cmdline() { show_help exit 0 fi + + if [ -z "${PMIX_PATH}" ]; then + echo_yellow "PMIX path was not specified" + echo_yellow "Assuming it is the directory where libpmix is found" + fi + readonly CLEAN=${CLEAN:=true} } @@ -126,7 +142,7 @@ inject_mpi() { find ${MPI_PATH} -maxdepth 1 -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; # Get MPI libs dependencies from system ldd - local libname libpath + local libname libpath pmixpath local -A libs_dict local -a dlopen_libs @@ -144,8 +160,15 @@ inject_mpi() { if [[ ${libname} =~ libpmix\.so\.?.* ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work + [ -n "${PMIX_PATH}" ] && pmixpath="${PMIX_PATH}/pmix" || pmixpath="$(dirname ${libpath})/pmix" find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; find ${libdir} -maxdepth 1 -type l -name "libpmix.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + local depname deppath + while read -r depname deppath; do + libs_dict[${depname}]=${deppath} + done < <(find ${pmixpath} -maxdepth 1 -name "*.so*" -exec ${system_ldd} {} \; | awk '/=>/ {print $1, $3}' | sort | uniq) + libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi @@ -178,6 +201,14 @@ inject_mpi() { done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ | grep -e "=> not found" -e "=> ${MPI_PATH}" | awk '!/libmpi\.so.*/ {print $1}' | sort | uniq) fi + + # Inject into libpmix.so non resolved dependencies from dlopen libraries in the PMIX path + if [[ ${lib} =~ libpmix\.so ]]; then + while read -r dep; do + if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} + fi + done < <(find ${pmixpath} -maxdepth 1 -type f -name "*.so*" -exec ${eessi_ldd} {} \; | awk '/not found/ && !/libpmix\.so.*/ {print $1}' | sort | uniq) fi # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) From 2d0373b6439cda5075690393bf88a76c97896a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Mon, 3 Mar 2025 19:12:57 +0100 Subject: [PATCH 11/20] add libfabric dependencies to the libs dictionary --- scripts/mpi_support/install_openmpi_host_injection.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 7b031b461e..ceace8c805 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -155,6 +155,12 @@ inject_mpi() { local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; find ${libdir} -maxdepth 1 -type l -name "libfabric.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + local depname deppath + while read -r depname deppath; do + libs_dict[${depname}]=${deppath} + done < <(${system_ldd} ${libpath} | awk '/=>/ {print $1, $3}' | sort | uniq) + libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi From bcf8d135fba102958d2498d012ce79278803f778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 10:49:11 +0100 Subject: [PATCH 12/20] simplify regex --- scripts/mpi_support/install_openmpi_host_injection.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index ceace8c805..a9ed73ed64 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -151,7 +151,7 @@ inject_mpi() { # Get all library names and paths in associative array while read -r libname libpath; do # If library is libfabric or from the MPI path, modify libpath in assoc array to point to host_injection_mpi_path - if [[ ${libname} =~ libfabric\.so\.?.* ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + if [[ ${libname} =~ libfabric\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; find ${libdir} -maxdepth 1 -type l -name "libfabric.so*" -exec cp -P {} ${host_injection_mpi_path} \; @@ -164,7 +164,7 @@ inject_mpi() { libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi - if [[ ${libname} =~ libpmix\.so\.?.* ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + if [[ ${libname} =~ libpmix\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work [ -n "${PMIX_PATH}" ] && pmixpath="${PMIX_PATH}/pmix" || pmixpath="$(dirname ${libpath})/pmix" find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; @@ -218,7 +218,7 @@ inject_mpi() { fi # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) - if [[ ${lib} =~ libfabric\.so\.?.* ]]; then + if [[ ${lib} =~ libfabric\.so ]]; then while read -r dep; do ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) From 18b39fb13963c22559262e3e8ef62005244f42fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 12:34:33 +0100 Subject: [PATCH 13/20] update comments --- scripts/mpi_support/install_openmpi_host_injection.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index a9ed73ed64..01783fb20b 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -149,8 +149,9 @@ inject_mpi() { readarray -d '' dlopen_libs < <(find ${MPI_PATH} -mindepth 2 -name "*.so*") # Get all library names and paths in associative array + # If library is libfabric, libpmix, or from the MPI path + # modify libpath in assoc array to point to host_injection_mpi_path while read -r libname libpath; do - # If library is libfabric or from the MPI path, modify libpath in assoc array to point to host_injection_mpi_path if [[ ${libname} =~ libfabric\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; @@ -188,7 +189,7 @@ inject_mpi() { <(for dlopen in ${dlopen_libs[@]}; do ${system_ldd} ${dlopen}; done) \ | awk '/=>/ {print $1, $3}' | sort | uniq) - # Get MPI related lib dependencies not resolved by EESSI ldd + # Do library injection to openmpi libs, libfabric and libpmix local lib while read -r lib; do local dep From b1983f4ea890947df92947b60cd5aa92e94b5f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 12:37:28 +0100 Subject: [PATCH 14/20] force certain dependencies into libfabric done first --- .../install_openmpi_host_injection.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 01783fb20b..95b6cdcc52 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -194,6 +194,15 @@ inject_mpi() { while read -r lib; do local dep + # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) + # Must be done before the injection of unresolved dependencies + if [[ ${lib} =~ libfabric\.so ]]; then + while read -r dep; do + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) + fi + + # Do injection of unresolved libraries ${PATCHELF_BIN} --set-rpath "${host_injection_mpi_path}" ${lib} while read -r dep; do ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} @@ -218,13 +227,6 @@ inject_mpi() { done < <(find ${pmixpath} -maxdepth 1 -type f -name "*.so*" -exec ${eessi_ldd} {} \; | awk '/not found/ && !/libpmix\.so.*/ {print $1}' | sort | uniq) fi - # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) - if [[ ${lib} =~ libfabric\.so ]]; then - while read -r dep; do - ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} - done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) - fi - done < <(find ${temp_inject_path} -type f) # Sanity check MPI injection From 5deaab100111d2059e630b6404db4af46376ddb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 12:45:48 +0100 Subject: [PATCH 15/20] check if unresolved dependency is needed and choose add or replace --- scripts/mpi_support/install_openmpi_host_injection.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 95b6cdcc52..7a521ad1fa 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -205,7 +205,11 @@ inject_mpi() { # Do injection of unresolved libraries ${PATCHELF_BIN} --set-rpath "${host_injection_mpi_path}" ${lib} while read -r dep; do - ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + else + ${PATCHELF_BIN} --add-needed ${dep} ${libs_dict[${dep}]} ${lib} + fi done < <(${eessi_ldd} ${lib} | awk '/not found/ {print $1}' | sort | uniq) # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so From 3fdbd0b02a8c86f36a3ca6b8b8e602f9f4e13e4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 12:47:54 +0100 Subject: [PATCH 16/20] fix add new needed library: only two args needed --- scripts/mpi_support/install_openmpi_host_injection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 7a521ad1fa..6b48b0ac04 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -208,7 +208,7 @@ inject_mpi() { if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} else - ${PATCHELF_BIN} --add-needed ${dep} ${libs_dict[${dep}]} ${lib} + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} fi done < <(${eessi_ldd} ${lib} | awk '/not found/ {print $1}' | sort | uniq) From 883593564ced83a1286fee0b65d15974c2098481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 14:05:50 +0100 Subject: [PATCH 17/20] remove unneeded pmix code --- .../install_openmpi_host_injection.sh | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 6b48b0ac04..861a5f4abc 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -43,16 +43,6 @@ parse_cmdline() { exit 1 fi ;; - --pmix-path) - if [ -n "$2"]; then - readonly PMIX_PATH="$2" - shift 2 - else - echo_red "Error: Argument required for $1" - show_help - exit 1 - fi - ;; -t|--temp-dir) if [ -n "$2" ]; then readonly TEMP_DIR="$2" @@ -171,11 +161,6 @@ inject_mpi() { find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; find ${libdir} -maxdepth 1 -type l -name "libpmix.so*" -exec cp -P {} ${host_injection_mpi_path} \; - local depname deppath - while read -r depname deppath; do - libs_dict[${depname}]=${deppath} - done < <(find ${pmixpath} -maxdepth 1 -name "*.so*" -exec ${system_ldd} {} \; | awk '/=>/ {print $1, $3}' | sort | uniq) - libpath=${host_injection_mpi_path}/$(basename ${libpath}) fi @@ -219,16 +204,7 @@ inject_mpi() { ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} fi done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ - | grep -e "=> not found" -e "=> ${MPI_PATH}" | awk '!/libmpi\.so.*/ {print $1}' | sort | uniq) - fi - - # Inject into libpmix.so non resolved dependencies from dlopen libraries in the PMIX path - if [[ ${lib} =~ libpmix\.so ]]; then - while read -r dep; do - if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then - ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} - fi - done < <(find ${pmixpath} -maxdepth 1 -type f -name "*.so*" -exec ${eessi_ldd} {} \; | awk '/not found/ && !/libpmix\.so.*/ {print $1}' | sort | uniq) + | awk '/not found/ && !/libmpi\.so.*/ {print $1}' | sort | uniq) fi done < <(find ${temp_inject_path} -type f) From f60394d0f7f1a5fb70fb991419033993c86758c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 14:16:34 +0100 Subject: [PATCH 18/20] remove pmix path from parse_cmdline --- scripts/mpi_support/install_openmpi_host_injection.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 861a5f4abc..aab42f684c 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -70,11 +70,6 @@ parse_cmdline() { exit 0 fi - if [ -z "${PMIX_PATH}" ]; then - echo_yellow "PMIX path was not specified" - echo_yellow "Assuming it is the directory where libpmix is found" - fi - readonly CLEAN=${CLEAN:=true} } From 6b3c1fd139ca74099d099dd30bff639b4c8795b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 15:04:05 +0100 Subject: [PATCH 19/20] improve sanity check --- .../install_openmpi_host_injection.sh | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index aab42f684c..1f832f98a4 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -22,7 +22,7 @@ show_help() { echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation (Required)" echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" echo " storage during the mpi injection" - echo " --noclean Do not remove the temporary directory after finishing injection" + echo " --noclean Do not remove the temporary directory and the host injected libraries after finishing injection" } @@ -205,13 +205,17 @@ inject_mpi() { done < <(find ${temp_inject_path} -type f) # Sanity check MPI injection + local sanity=1 if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then cp ${temp_inject_path}/* -t ${host_injection_mpi_path} - echo_green "MPI injection was successful" - return 0 - else - fatal_error "MPI host injection failed. EESSI will use its own MPI libraries" + if ${eessi_ldd} ${temp_inject_path}/* | grep -q "not found"; then + ${CLEAN} && rm -f ${host_injection_mpi_path}/*.so* + else + sanity=0 + fi fi + + return ${sanity} } @@ -236,7 +240,12 @@ main() { echo "Temporary directory for injection: ${tmpdir}" download_patchelf ${tmpdir} - inject_mpi ${tmpdir} + + if inject_mpi ${tmpdir}; then + echo_green "MPI injection was successful" + else + fatal_error "MPI host injection failed" + fi if ${CLEAN}; then rm -rf "${tmpdir}" From 4dac5da6767edeca2f1cb1cd90774597d5342f34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Fern=C3=A1ndez?= Date: Tue, 4 Mar 2025 16:32:51 +0100 Subject: [PATCH 20/20] add force cmd option to do MPI injection even if already done --- .../mpi_support/install_openmpi_host_injection.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 1f832f98a4..3a9872cea7 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -23,6 +23,7 @@ show_help() { echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" echo " storage during the mpi injection" echo " --noclean Do not remove the temporary directory and the host injected libraries after finishing injection" + echo " --force Force MPI injection even if it is already done" } @@ -57,6 +58,10 @@ parse_cmdline() { CLEAN=false shift 1 ;; + --force) + FORCE=true + shift 1 + ;; *) echo_red "Error: Unknown option: $1" show_help @@ -71,6 +76,7 @@ parse_cmdline() { fi readonly CLEAN=${CLEAN:=true} + readonly FORCE=${FORCE:=false} } @@ -111,7 +117,11 @@ inject_mpi() { if [ -d ${host_injection_mpi_path} ]; then if [ -n "$(ls -A ${host_injection_mpi_path})" ]; then echo "MPI was already injected" - return 0 + if ${FORCE}; then + echo "Forcing new MPI injection" + else + return 0 + fi fi fi