From 703ec7405720c1c5d2dead63ba74b8d0284e2646 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 04:48:11 +0000 Subject: [PATCH 1/8] Update ROCm versions for docker images --- .github/workflows/build-libtorch-images.yml | 2 +- .github/workflows/build-manywheel-images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml index 7968bbb26..7c8e59f36 100644 --- a/.github/workflows/build-libtorch-images.yml +++ b/.github/workflows/build-libtorch-images.yml @@ -52,7 +52,7 @@ jobs: runs-on: linux.12xlarge strategy: matrix: - rocm_version: ["5.6", "5.7"] + rocm_version: ["5.7", "6.0"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index d717416f6..46056ba14 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -58,7 +58,7 @@ jobs: runs-on: linux.12xlarge strategy: matrix: - rocm_version: ["5.6", "5.7"] + rocm_version: ["5.7", "6.0"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} From 6a2b2cad396390e1156e702ca2b42df94e422505 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 05:25:54 +0000 Subject: [PATCH 2/8] Don't build MIOpen from source for ROCm6.0 --- common/install_miopen.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 779bc755d..0a2e98420 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -56,7 +56,10 @@ MIOPEN_CMAKE_COMMON_FLAGS=" -DMIOPEN_BUILD_DRIVER=OFF " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version -if [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 50800 ]]; then +if [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then + echo "ROCm 6.0 MIOpen does not need any patches, do not build from source" + exit 0 +elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then echo "ROCm 5.7 MIOpen does not need any patches, do not build from source" exit 0 elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then From 1b2e168d06137141f078c816395989069645f781 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 05:27:39 +0000 Subject: [PATCH 3/8] Temporarily use magma fork with ROCm6.0 patch --- common/install_rocm_magma.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index c37c1e30a..0bf9b6ed6 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -10,12 +10,12 @@ set -ex MKLROOT=${MKLROOT:-/opt/intel} # "install" hipMAGMA into /opt/rocm/magma by copying after build -git clone https://bitbucket.org/icl/magma.git +git clone https://bitbucket.org/jithunnair-amd/magma.git -b gcnArch_deprecation pushd magma if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then git checkout magma_ctrl_launch_bounds else - git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f + git checkout a1d5ecd1a93bd031819bcb6500002c109ac29c74 fi cp make.inc-examples/make.inc.hip-gcc-mkl make.inc echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc From 94d243977055eab4aa61f641376f87f0aae77b70 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 06:12:29 +0000 Subject: [PATCH 4/8] Update ROCm versions for docker images --- libtorch/build_all_docker.sh | 2 +- manywheel/build_all_docker.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh index fb6bd975b..1a3a90d5a 100755 --- a/libtorch/build_all_docker.sh +++ b/libtorch/build_all_docker.sh @@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8; do GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh" done -for rocm_version in 5.6 5.7; do +for rocm_version in 5.7 6.0; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh" done diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index 2995e3be7..8a02361cb 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -16,7 +16,7 @@ for cuda_version in 12.1 11.8; do MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh" done -for rocm_version in 5.6 5.7; do +for rocm_version in 5.7 6.0; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" done From b4c1b97307bb952d5ce0a8bac67f0d178c180178 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 06:13:43 +0000 Subject: [PATCH 5/8] Add gfx942 --- libtorch/build_docker.sh | 2 +- manywheel/build_docker.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh index 8997f69cf..f2057bce0 100755 --- a/libtorch/build_docker.sh +++ b/libtorch/build_docker.sh @@ -28,7 +28,7 @@ case ${GPU_ARCH_TYPE} in BASE_TARGET=rocm DOCKER_TAG=rocm${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index e547b4275..cf07c0dd6 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -49,7 +49,7 @@ case ${GPU_ARCH_TYPE} in DOCKER_TAG=rocm${GPU_ARCH_VERSION} LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) From 380bbe717f235e3f14caf89bf4092ab42b97d457 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 19 Dec 2023 06:28:02 +0000 Subject: [PATCH 6/8] Update MIOpen repo --- common/install_miopen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 0a2e98420..09ab251b7 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -88,7 +88,7 @@ fi yum remove -y miopen-hip -git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH} +git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH} pushd MIOpen # remove .git to save disk space since CI runner was running out rm -rf .git From 63747c23ee68eb3497788f139bb1f0af9478af5e Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 20 Dec 2023 17:29:32 +0000 Subject: [PATCH 7/8] Magma PR 42 is merged, so use upstream repo master branch now --- common/install_rocm_magma.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index 0bf9b6ed6..c8e43f675 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -10,12 +10,12 @@ set -ex MKLROOT=${MKLROOT:-/opt/intel} # "install" hipMAGMA into /opt/rocm/magma by copying after build -git clone https://bitbucket.org/jithunnair-amd/magma.git -b gcnArch_deprecation +git clone https://bitbucket.org/icl/magma.git pushd magma if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then git checkout magma_ctrl_launch_bounds else - git checkout a1d5ecd1a93bd031819bcb6500002c109ac29c74 + git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6 fi cp make.inc-examples/make.inc.hip-gcc-mkl make.inc echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc From 918c21ae05cab55dcd956573ac11f318e85255a2 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Fri, 22 Dec 2023 14:19:01 +0000 Subject: [PATCH 8/8] gfx942 target only fully supported for ROCm6.0 and above --- libtorch/build_docker.sh | 5 ++++- manywheel/build_docker.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh index f2057bce0..b7ebdd36e 100755 --- a/libtorch/build_docker.sh +++ b/libtorch/build_docker.sh @@ -28,7 +28,7 @@ case ${GPU_ARCH_TYPE} in BASE_TARGET=rocm DOCKER_TAG=rocm${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) @@ -36,6 +36,9 @@ case ${GPU_ARCH_TYPE} in echo "ERROR: rocm regex failed" exit 1 fi + if [[ $ROCM_VERSION_INT -ge 60000 ]]; then + PYTORCH_ROCM_ARCH+=";gfx942" + fi DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" ;; *) diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index cf07c0dd6..63b8e0c3d 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -49,7 +49,7 @@ case ${GPU_ARCH_TYPE} in DOCKER_TAG=rocm${GPU_ARCH_VERSION} LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-rocm:${GPU_ARCH_VERSION} GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete - PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100" + PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) @@ -57,6 +57,9 @@ case ${GPU_ARCH_TYPE} in echo "ERROR: rocm regex failed" exit 1 fi + if [[ $ROCM_VERSION_INT -ge 60000 ]]; then + PYTORCH_ROCM_ARCH+=";gfx942" + fi DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9" ;; *)