From 841de2cc899696e537ec827ef1fc7797852fad44 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 8 Feb 2023 09:38:52 +0100 Subject: [PATCH 01/19] consolidate Linux workflows on CPU and GPU --- .github/unittest.sh | 84 ++++++++++++++++++++++++++++ .github/workflows/test-linux-cpu.yml | 57 ------------------- .github/workflows/test-linux-gpu.yml | 61 -------------------- .github/workflows/test-linux.yml | 38 +++++++++++++ 4 files changed, 122 insertions(+), 118 deletions(-) create mode 100755 .github/unittest.sh delete mode 100644 .github/workflows/test-linux-cpu.yml delete mode 100644 .github/workflows/test-linux-gpu.yml create mode 100644 .github/workflows/test-linux.yml diff --git a/.github/unittest.sh b/.github/unittest.sh new file mode 100755 index 00000000000..98889286685 --- /dev/null +++ b/.github/unittest.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +set -euo pipefail + +echo '::group::Prepare conda' +CONDA_PATH=$(which conda) +eval "$(${CONDA_PATH} shell.bash hook)" +# The `setuptools` package installed through `conda` includes a patch that errors if something is installed +# through `setuptools` while the `CONDA_BUILD` environment variable is set. +# https://github.com/AnacondaRecipes/setuptools-feedstock/blob/f5d8d256810ce28fc0cf34170bc34e06d3754041/recipe/patches/0002-disable-downloads-inside-conda-build.patch +# (Although we are not using the `-c conda-forge` channel, the patch is equivalent but not public for +# `setuptools` from the `-c defaults` channel) +# Since we aren't using `conda build` here, we unset it to avoid installation problems later +# TODO: investigate where `CONDA_BUILD` is set and maybe fix unset it there +unset CONDA_BUILD +echo '::endgroup::' + +echo '::group::Set PyTorch conda channel' +# TODO: Can we maybe have this as environment variable in the job template? For example, `IS_RELEASE`. +if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + POSTFIX=test +else + POSTFIX=nightly +fi +PYTORCH_CHANNEL=pytorch-"${POSTFIX}" +echo "${PYTORCH_CHANNEL}" +echo '::endgroup::' + +echo '::group::Set PyTorch GPU mutex' +case $GPU_ARCH_TYPE in + cpu) + PYTORCH_MUTEX=cpuonly + ;; + cuda) + PYTORCH_MUTEX="pytorch-cuda=${GPU_ARCH_VERSION}" + ;; + *) + echo "Unknown GPU_ARCH_TYPE=${GPU_ARCH_TYPE}" + exit 1 + ;; +esac +echo "${PYTORCH_MUTEX}" +echo '::endgroup::' + +echo '::group::Create build environment' +conda create \ + --name ci \ + --quiet --yes \ + python="${PYTHON_VERSION}" pip \ + setuptools ninja \ + libpng jpeg \ + Pillow numpy requests +conda activate ci +pip install 'av<10' +echo '::endgroup::' + +echo '::group::Install PyTorch' +conda install \ + --quiet --yes \ + -c "${PYTORCH_CHANNEL}" \ + -c nvidia \ + pytorch \ + "${PYTORCH_MUTEX}" + +if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then + python3 -c "import torch; exit(not torch.cuda.is_available())" +fi +echo '::endgroup::' + +echo '::group::Install TorchVision' +python setup.py develop +echo '::endgroup::' + +echo '::group::Collect PyTorch environment information' +python -m torch.utils.collect_env +echo '::endgroup::' + +echo '::group::Install testing utilities' +pip install --progress-bar=off pytest pytest-mock pytest-cov +echo '::endgroup::' + +echo '::group::Run tests' +pytest --durations=25 +echo '::endgroup::' diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml deleted file mode 100644 index 5dc7550d868..00000000000 --- a/.github/workflows/test-linux-cpu.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Unit-tests on Linux CPU - -on: - pull_request: - push: - branches: - - nightly - - main - - release/* - workflow_dispatch: - -env: - CHANNEL: "nightly" - -jobs: - tests: - strategy: - matrix: - python_version: ["3.8", "3.9", "3.10"] - fail-fast: false - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.12xlarge - repository: pytorch/vision - script: | - # Mark Build Directory Safe - git config --global --add safe.directory /__w/vision/vision - - # Set up Environment Variables - export PYTHON_VERSION="${{ matrix.python_version }}" - export VERSION="cpu" - export CUDATOOLKIT="cpuonly" - - # Set CHANNEL - if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then - export CHANNEL=test - else - export CHANNEL=nightly - fi - - # Create Conda Env - conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy - conda activate /work/ci_env - - # Install PyTorch, Torchvision, and testing libraries - set -ex - conda install \ - --yes \ - -c "pytorch-${CHANNEL}" \ - -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ - "${CUDATOOLKIT}" - python3 setup.py develop - python3 -m pip install pytest pytest-mock 'av<10' - - # Run Tests - python3 -m torch.utils.collect_env - python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml deleted file mode 100644 index 831de27e350..00000000000 --- a/.github/workflows/test-linux-gpu.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: Unit-tests on Linux GPU - -on: - pull_request: - push: - branches: - - nightly - - main - - release/* - workflow_dispatch: - -env: - CHANNEL: "nightly" - -jobs: - tests: - strategy: - matrix: - python_version: ["3.8"] - cuda_arch_version: ["11.7"] - fail-fast: false - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.g5.4xlarge.nvidia.gpu - repository: pytorch/vision - gpu-arch-type: cuda - gpu-arch-version: ${{ matrix.cuda_arch_version }} - timeout: 120 - script: | - # Mark Build Directory Safe - git config --global --add safe.directory /__w/vision/vision - - # Set up Environment Variables - export PYTHON_VERSION="${{ matrix.python_version }}" - export VERSION="${{ matrix.cuda_arch_version }}" - export CUDATOOLKIT="pytorch-cuda=${VERSION}" - - # Set CHANNEL - if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then - export CHANNEL=test - else - export CHANNEL=nightly - fi - - # Create Conda Env - conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy - conda activate /work/ci_env - - # Install PyTorch, Torchvision, and testing libraries - set -ex - conda install \ - --yes \ - -c "pytorch-${CHANNEL}" \ - -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ - "${CUDATOOLKIT}" - python3 setup.py develop - python3 -m pip install pytest pytest-mock 'av<10' - - # Run Tests - python3 -m torch.utils.collect_env - python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml new file mode 100644 index 00000000000..f34ad299483 --- /dev/null +++ b/.github/workflows/test-linux.yml @@ -0,0 +1,38 @@ +name: Unit-tests on Linux + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +jobs: + tests: + strategy: + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + runner: ["linux.12xlarge"] + gpu-arch-type: ["cpu"] + include: + - python-version: 3.8 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.7" + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + repository: pytorch/vision + runner: ${{ matrix.runner }} + timeout: 120 + script: | + export PYTHON_VERSION=${{ matrix.python-version }} + export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} + export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} + + ./.github/unittest.sh From 7e9083822061529cf04be34916368eab7d61db57 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 8 Feb 2023 14:42:38 +0100 Subject: [PATCH 02/19] add sleep for ssh --- .github/unittest.sh | 30 ++++++------- .github/workflows/test-linux-gpu.yml | 64 ++++++++++++++++++++++++++++ .github/workflows/test-linux.yml | 2 + 3 files changed, 81 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/test-linux-gpu.yml diff --git a/.github/unittest.sh b/.github/unittest.sh index 98889286685..d64441dbcc9 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -67,18 +67,18 @@ if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then fi echo '::endgroup::' -echo '::group::Install TorchVision' -python setup.py develop -echo '::endgroup::' - -echo '::group::Collect PyTorch environment information' -python -m torch.utils.collect_env -echo '::endgroup::' - -echo '::group::Install testing utilities' -pip install --progress-bar=off pytest pytest-mock pytest-cov -echo '::endgroup::' - -echo '::group::Run tests' -pytest --durations=25 -echo '::endgroup::' +#echo '::group::Install TorchVision' +#python setup.py develop +#echo '::endgroup::' +# +#echo '::group::Collect PyTorch environment information' +#python -m torch.utils.collect_env +#echo '::endgroup::' +# +#echo '::group::Install testing utilities' +#pip install --progress-bar=off pytest pytest-mock pytest-cov +#echo '::endgroup::' +# +#echo '::group::Run tests' +#pytest --durations=25 +#echo '::endgroup::' diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml new file mode 100644 index 00000000000..b8c3d97d020 --- /dev/null +++ b/.github/workflows/test-linux-gpu.yml @@ -0,0 +1,64 @@ +name: Unit-tests on Linux GPU + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +env: + CHANNEL: "nightly" + +jobs: + tests: + strategy: + matrix: + python_version: ["3.8"] + cuda_arch_version: ["11.7"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.4xlarge.nvidia.gpu + repository: pytorch/vision + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda_arch_version }} + timeout: 120 + script: | + # Mark Build Directory Safe + git config --global --add safe.directory /__w/vision/vision + + # Set up Environment Variables + export PYTHON_VERSION="${{ matrix.python_version }}" + export VERSION="${{ matrix.cuda_arch_version }}" + export CUDATOOLKIT="pytorch-cuda=${VERSION}" + + # Set CHANNEL + if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + export CHANNEL=test + else + export CHANNEL=nightly + fi + + # Create Conda Env + conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy + conda activate /work/ci_env + + # Install PyTorch, Torchvision, and testing libraries + set -ex + conda install \ + --yes \ + -c "pytorch-${CHANNEL}" \ + -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ + "${CUDATOOLKIT}" + + sleep 7200 + +# python3 setup.py develop +# python3 -m pip install pytest pytest-mock 'av<10' +# +# # Run Tests +# python3 -m torch.utils.collect_env +# python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index f34ad299483..00f7bb617b6 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -36,3 +36,5 @@ jobs: export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} ./.github/unittest.sh + + sleep 7200 From 99d6332bf6ea52be67c0dbc8c94d6b408322a772 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 8 Feb 2023 14:44:46 +0100 Subject: [PATCH 03/19] disable CPU for now --- .github/workflows/test-linux.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 00f7bb617b6..e3814d98328 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -13,12 +13,12 @@ jobs: tests: strategy: matrix: - python-version: - - "3.8" - - "3.9" - - "3.10" - runner: ["linux.12xlarge"] - gpu-arch-type: ["cpu"] +# python-version: +# - "3.8" +# - "3.9" +# - "3.10" +# runner: ["linux.12xlarge"] +# gpu-arch-type: ["cpu"] include: - python-version: 3.8 runner: linux.g5.4xlarge.nvidia.gpu From d81811d76d70ef062f229a4370aaa6ada7cda75e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 8 Feb 2023 22:45:53 +0100 Subject: [PATCH 04/19] sleep right away --- .github/workflows/test-linux-gpu.yml | 6 +++--- .github/workflows/test-linux.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml index b8c3d97d020..0f43b33fa30 100644 --- a/.github/workflows/test-linux-gpu.yml +++ b/.github/workflows/test-linux-gpu.yml @@ -27,6 +27,8 @@ jobs: gpu-arch-version: ${{ matrix.cuda_arch_version }} timeout: 120 script: | + sleep 7200 + # Mark Build Directory Safe git config --global --add safe.directory /__w/vision/vision @@ -53,9 +55,7 @@ jobs: -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - - sleep 7200 - + # python3 setup.py develop # python3 -m pip install pytest pytest-mock 'av<10' # diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index e3814d98328..7af0577337f 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -35,6 +35,6 @@ jobs: export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} - ./.github/unittest.sh - sleep 7200 + + ./.github/unittest.sh From c87f260ef6a7e90d2d8db82650647322cf3378b3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 8 Feb 2023 23:07:16 +0100 Subject: [PATCH 05/19] fix GPU workflow inputs --- .github/unittest.sh | 30 ++++++------- .github/workflows/test-linux-gpu.yml | 64 ---------------------------- .github/workflows/test-linux.yml | 16 +++---- 3 files changed, 23 insertions(+), 87 deletions(-) delete mode 100644 .github/workflows/test-linux-gpu.yml diff --git a/.github/unittest.sh b/.github/unittest.sh index d64441dbcc9..98889286685 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -67,18 +67,18 @@ if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then fi echo '::endgroup::' -#echo '::group::Install TorchVision' -#python setup.py develop -#echo '::endgroup::' -# -#echo '::group::Collect PyTorch environment information' -#python -m torch.utils.collect_env -#echo '::endgroup::' -# -#echo '::group::Install testing utilities' -#pip install --progress-bar=off pytest pytest-mock pytest-cov -#echo '::endgroup::' -# -#echo '::group::Run tests' -#pytest --durations=25 -#echo '::endgroup::' +echo '::group::Install TorchVision' +python setup.py develop +echo '::endgroup::' + +echo '::group::Collect PyTorch environment information' +python -m torch.utils.collect_env +echo '::endgroup::' + +echo '::group::Install testing utilities' +pip install --progress-bar=off pytest pytest-mock pytest-cov +echo '::endgroup::' + +echo '::group::Run tests' +pytest --durations=25 +echo '::endgroup::' diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml deleted file mode 100644 index 0f43b33fa30..00000000000 --- a/.github/workflows/test-linux-gpu.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: Unit-tests on Linux GPU - -on: - pull_request: - push: - branches: - - nightly - - main - - release/* - workflow_dispatch: - -env: - CHANNEL: "nightly" - -jobs: - tests: - strategy: - matrix: - python_version: ["3.8"] - cuda_arch_version: ["11.7"] - fail-fast: false - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: - runner: linux.g5.4xlarge.nvidia.gpu - repository: pytorch/vision - gpu-arch-type: cuda - gpu-arch-version: ${{ matrix.cuda_arch_version }} - timeout: 120 - script: | - sleep 7200 - - # Mark Build Directory Safe - git config --global --add safe.directory /__w/vision/vision - - # Set up Environment Variables - export PYTHON_VERSION="${{ matrix.python_version }}" - export VERSION="${{ matrix.cuda_arch_version }}" - export CUDATOOLKIT="pytorch-cuda=${VERSION}" - - # Set CHANNEL - if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then - export CHANNEL=test - else - export CHANNEL=nightly - fi - - # Create Conda Env - conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy - conda activate /work/ci_env - - # Install PyTorch, Torchvision, and testing libraries - set -ex - conda install \ - --yes \ - -c "pytorch-${CHANNEL}" \ - -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ - "${CUDATOOLKIT}" - -# python3 setup.py develop -# python3 -m pip install pytest pytest-mock 'av<10' -# -# # Run Tests -# python3 -m torch.utils.collect_env -# python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 7af0577337f..88d14eee3e5 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -13,12 +13,12 @@ jobs: tests: strategy: matrix: -# python-version: -# - "3.8" -# - "3.9" -# - "3.10" -# runner: ["linux.12xlarge"] -# gpu-arch-type: ["cpu"] + python-version: + - "3.8" + - "3.9" + - "3.10" + runner: ["linux.12xlarge"] + gpu-arch-type: ["cpu"] include: - python-version: 3.8 runner: linux.g5.4xlarge.nvidia.gpu @@ -29,12 +29,12 @@ jobs: with: repository: pytorch/vision runner: ${{ matrix.runner }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} timeout: 120 script: | export PYTHON_VERSION=${{ matrix.python-version }} export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} - sleep 7200 - ./.github/unittest.sh From 11a2d6869e75e2038d292a87e2843e17f08beb1a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 20 Feb 2023 15:39:00 +0100 Subject: [PATCH 06/19] refactor script --- .github/unittest.sh | 53 +++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 98889286685..1f6bf0423c4 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -5,41 +5,33 @@ set -euo pipefail echo '::group::Prepare conda' CONDA_PATH=$(which conda) eval "$(${CONDA_PATH} shell.bash hook)" -# The `setuptools` package installed through `conda` includes a patch that errors if something is installed -# through `setuptools` while the `CONDA_BUILD` environment variable is set. -# https://github.com/AnacondaRecipes/setuptools-feedstock/blob/f5d8d256810ce28fc0cf34170bc34e06d3754041/recipe/patches/0002-disable-downloads-inside-conda-build.patch -# (Although we are not using the `-c conda-forge` channel, the patch is equivalent but not public for -# `setuptools` from the `-c defaults` channel) -# Since we aren't using `conda build` here, we unset it to avoid installation problems later -# TODO: investigate where `CONDA_BUILD` is set and maybe fix unset it there -unset CONDA_BUILD echo '::endgroup::' -echo '::group::Set PyTorch conda channel' +echo '::group::Set PyTorch conda channel and wheel index' # TODO: Can we maybe have this as environment variable in the job template? For example, `IS_RELEASE`. if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then - POSTFIX=test + CHANNEL_ID=test else - POSTFIX=nightly + CHANNEL_ID=nightly fi -PYTORCH_CHANNEL=pytorch-"${POSTFIX}" -echo "${PYTORCH_CHANNEL}" -echo '::endgroup::' +PYTORCH_CONDA_CHANNEL=pytorch-"${CHANNEL_ID}" +echo "${PYTORCH_CONDA_CHANNEL}" -echo '::group::Set PyTorch GPU mutex' case $GPU_ARCH_TYPE in cpu) - PYTORCH_MUTEX=cpuonly + GPU_ARCH_ID="cpu" ;; cuda) - PYTORCH_MUTEX="pytorch-cuda=${GPU_ARCH_VERSION}" + VERSION_WITHOUT_DOT=$(echo "${GPU_ARCH_VERSION}" | sed 's/\.//') + GPU_ARCH_ID="cu${VERSION_WITHOUT_DOT}" ;; *) echo "Unknown GPU_ARCH_TYPE=${GPU_ARCH_TYPE}" exit 1 ;; esac -echo "${PYTORCH_MUTEX}" +PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${CHANNEL_ID}/${GPU_ARCH_ID}" +echo "${PYTORCH_WHEEL_INDEX}" echo '::endgroup::' echo '::group::Create build environment' @@ -47,20 +39,25 @@ conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ - setuptools ninja \ - libpng jpeg \ - Pillow numpy requests + ninja libpng jpeg \ + -c "${PYTORCH_CONDA_CHANNEL}" \ + -c conda-forge conda activate ci -pip install 'av<10' +pip install --progress-bar=off --upgrade setuptools echo '::endgroup::' echo '::group::Install PyTorch' -conda install \ - --quiet --yes \ - -c "${PYTORCH_CHANNEL}" \ - -c nvidia \ - pytorch \ - "${PYTORCH_MUTEX}" +# Due to the supply chain attack in Dec 2022 (https://pytorch.org/blog/compromised-nightly-dependency/), we host all +# third-party dependencies on Linux on our own indices and *don't* install them from PyPI. +case "$(uname -s)" in + Linux*) + INDEX_TYPE="index-url" + ;; + *) + INDEX_TYPE="extra-index-url" +esac + +pip install --progress-bar=off torch "--${INDEX_TYPE}=${PYTORCH_WHEEL_INDEX}" if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then python3 -c "import torch; exit(not torch.cuda.is_available())" From de913f912f21c3d53818bf2ef35b0c46860b9958 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 20 Feb 2023 16:05:10 +0100 Subject: [PATCH 07/19] cleanup --- .github/unittest.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 1f6bf0423c4..1b2feb83fde 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -2,10 +2,9 @@ set -euo pipefail -echo '::group::Prepare conda' +# Prepare conda CONDA_PATH=$(which conda) eval "$(${CONDA_PATH} shell.bash hook)" -echo '::endgroup::' echo '::group::Set PyTorch conda channel and wheel index' # TODO: Can we maybe have this as environment variable in the job template? For example, `IS_RELEASE`. @@ -15,7 +14,7 @@ else CHANNEL_ID=nightly fi PYTORCH_CONDA_CHANNEL=pytorch-"${CHANNEL_ID}" -echo "${PYTORCH_CONDA_CHANNEL}" +echo "PYTORCH_CONDA_CHANNEL=${PYTORCH_CONDA_CHANNEL}" case $GPU_ARCH_TYPE in cpu) @@ -31,7 +30,7 @@ case $GPU_ARCH_TYPE in ;; esac PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${CHANNEL_ID}/${GPU_ARCH_ID}" -echo "${PYTORCH_WHEEL_INDEX}" +echo "PYTORCH_WHEEL_INDEX=${PYTORCH_WHEEL_INDEX}" echo '::endgroup::' echo '::group::Create build environment' From 69315776c00e15b15eee77f38fcd45f3b4e9ca7f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 20 Feb 2023 16:08:06 +0100 Subject: [PATCH 08/19] also install ffmpeg --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 1b2feb83fde..b0a6cc37d95 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -38,7 +38,7 @@ conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ - ninja libpng jpeg \ + ninja libpng jpeg ffmpeg \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci From 701e71fe93b77584a1534cd4f22f0c034d14edb2 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Feb 2023 09:47:33 +0100 Subject: [PATCH 09/19] try ffmpeg<5 --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index b0a6cc37d95..538b59de9e7 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -38,7 +38,7 @@ conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ - ninja libpng jpeg ffmpeg \ + ninja libpng jpeg 'ffmpeg<5' \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci From 1c8659330ad4624d92a19e843cdf839341ab2dae Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Feb 2023 10:27:42 +0100 Subject: [PATCH 10/19] try ffmpeg<4.3 --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 538b59de9e7..4ead432a715 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -38,7 +38,7 @@ conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ - ninja libpng jpeg 'ffmpeg<5' \ + ninja libpng jpeg 'ffmpeg<4.3' \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci From 7b22e3d96cc31b3829f21d469a97f5512ff3b676 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Feb 2023 11:12:59 +0100 Subject: [PATCH 11/19] Update .github/unittest.sh --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 4ead432a715..6376675131b 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -42,7 +42,7 @@ conda create \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci -pip install --progress-bar=off --upgrade setuptools +pip install --progress-bar=off --upgrade setuptools av!=10.0.0 echo '::endgroup::' echo '::group::Install PyTorch' From 62dd31aff7babb2de6305257960849f297689e35 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 11:49:15 +0100 Subject: [PATCH 12/19] try 3.11 on linux --- .github/workflows/test-linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml index 88d14eee3e5..133eaf606eb 100644 --- a/.github/workflows/test-linux.yml +++ b/.github/workflows/test-linux.yml @@ -17,6 +17,7 @@ jobs: - "3.8" - "3.9" - "3.10" + - "3.11" runner: ["linux.12xlarge"] gpu-arch-type: ["cpu"] include: From d244e1f41d6b1487e3731ad535ea92b5c339931d Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 13:54:07 +0100 Subject: [PATCH 13/19] only try to install av on < 3.11 --- .github/unittest.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 6376675131b..ba1fd866ce1 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -42,7 +42,13 @@ conda create \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci -pip install --progress-bar=off --upgrade setuptools av!=10.0.0 +pip install --progress-bar=off --upgrade setuptools + +# See https://github.com/pytorch/vision/issues/6790 +if [[ "${PYTHON_VERSION}" != "3.11" ]] + pip install --progress-bar=off av!=10.0.0 +fi + echo '::endgroup::' echo '::group::Install PyTorch' From d5ea9ee066f2eab0d71caf6ccd1140d2dedfe43e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 14:00:04 +0100 Subject: [PATCH 14/19] add comment for ffmpeg pin --- .github/unittest.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index ba1fd866ce1..465c17a5e84 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -38,7 +38,8 @@ conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ - ninja libpng jpeg 'ffmpeg<4.3' \ + ninja libpng jpeg \ + 'ffmpeg<4.3' # See https://github.com/pytorch/vision/issues/7296 \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci From 2cbba69395f379a50c684764187787ca2a4e716b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 14:01:14 +0100 Subject: [PATCH 15/19] add missing --pre --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 465c17a5e84..653857124af 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -63,7 +63,7 @@ case "$(uname -s)" in INDEX_TYPE="extra-index-url" esac -pip install --progress-bar=off torch "--${INDEX_TYPE}=${PYTORCH_WHEEL_INDEX}" +pip install --progress-bar=off --pre torch "--${INDEX_TYPE}=${PYTORCH_WHEEL_INDEX}" if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then python3 -c "import torch; exit(not torch.cuda.is_available())" From 084983885adc47b351abe454aa2a998b9fe44ec7 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 14:29:23 +0100 Subject: [PATCH 16/19] fix bash multiline comment --- .github/unittest.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 653857124af..a494be93364 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -34,12 +34,13 @@ echo "PYTORCH_WHEEL_INDEX=${PYTORCH_WHEEL_INDEX}" echo '::endgroup::' echo '::group::Create build environment' +# See https://github.com/pytorch/vision/issues/7296 for ffmpeg conda create \ --name ci \ --quiet --yes \ python="${PYTHON_VERSION}" pip \ ninja libpng jpeg \ - 'ffmpeg<4.3' # See https://github.com/pytorch/vision/issues/7296 \ + 'ffmpeg<4.3' \ -c "${PYTORCH_CONDA_CHANNEL}" \ -c conda-forge conda activate ci From d0c3ec9819a03a74bf5e3e5ab1b0dc331c747fc8 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 14:46:24 +0100 Subject: [PATCH 17/19] fix conditional --- .github/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index a494be93364..f7e3dd2127f 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -47,7 +47,7 @@ conda activate ci pip install --progress-bar=off --upgrade setuptools # See https://github.com/pytorch/vision/issues/6790 -if [[ "${PYTHON_VERSION}" != "3.11" ]] +if [[ "${PYTHON_VERSION}" != "3.11" ]]; then pip install --progress-bar=off av!=10.0.0 fi From 2819673b1e41bed46f2473c6454c9efcee7a8daf Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 27 Feb 2023 15:15:28 +0100 Subject: [PATCH 18/19] fix PyTorch installation --- .github/unittest.sh | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index f7e3dd2127f..2a0b2b2d6d9 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -54,19 +54,9 @@ fi echo '::endgroup::' echo '::group::Install PyTorch' -# Due to the supply chain attack in Dec 2022 (https://pytorch.org/blog/compromised-nightly-dependency/), we host all -# third-party dependencies on Linux on our own indices and *don't* install them from PyPI. -case "$(uname -s)" in - Linux*) - INDEX_TYPE="index-url" - ;; - *) - INDEX_TYPE="extra-index-url" -esac - -pip install --progress-bar=off --pre torch "--${INDEX_TYPE}=${PYTORCH_WHEEL_INDEX}" +pip install --progress-bar=off --pre torch --index-url="${PYTORCH_WHEEL_INDEX}" -if [[ $GPU_ARCH_TYPE = 'cuda' ]]; then +if [[ $GPU_ARCH_TYPE == 'cuda' ]]; then python3 -c "import torch; exit(not torch.cuda.is_available())" fi echo '::endgroup::' From 8e10b25b71be0201e018e7ee6b4492b160959fed Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 1 Mar 2023 21:40:05 +0100 Subject: [PATCH 19/19] use defaults channel over conda-forge --- .github/unittest.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/unittest.sh b/.github/unittest.sh index 2a0b2b2d6d9..0109a5cc2b1 100755 --- a/.github/unittest.sh +++ b/.github/unittest.sh @@ -5,6 +5,7 @@ set -euo pipefail # Prepare conda CONDA_PATH=$(which conda) eval "$(${CONDA_PATH} shell.bash hook)" +conda config --set channel_priority strict echo '::group::Set PyTorch conda channel and wheel index' # TODO: Can we maybe have this as environment variable in the job template? For example, `IS_RELEASE`. @@ -42,7 +43,7 @@ conda create \ ninja libpng jpeg \ 'ffmpeg<4.3' \ -c "${PYTORCH_CONDA_CHANNEL}" \ - -c conda-forge + -c defaults conda activate ci pip install --progress-bar=off --upgrade setuptools