diff --git a/.github/approve_config.yml b/.github/approve_config.yml index 76e83beb1aa39..365f2a89e23c2 100644 --- a/.github/approve_config.yml +++ b/.github/approve_config.yml @@ -3,7 +3,7 @@ approvals: minimum: 1 groups: - name: 'PyTorch Lightning' - minimum: 0 + minimum: 1 from: - awaelchli - Borda @@ -16,7 +16,8 @@ approvals: - tchaton - williamFalcon - name: 'Lightning Apps' - minimum: 0 + minimum: 1 + from: - alecmerdler - awaelchli - hhsecond diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a7a957a2a1db4..ab67c9026b55b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -17,8 +17,25 @@ updates: # Separate sections of the branch name with a hyphen # for example, `dependabot-npm_and_yarn-next_js-acorn-6.4.1` separator: "-" - # Allow up to 10 open pull requests for pip dependencies + # Allow up to 5 open pull requests for pip dependencies + open-pull-requests-limit: 5 + reviewers: + - "Lightning-AI/teams/core-lightning" + + # Enable version updates for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + # Check for updates once a week + schedule: + interval: "monthly" + # Labels on pull requests for version updates only + labels: + - "ci" + pull-request-branch-name: + # Separate sections of the branch name with a hyphen + # for example, `dependabot-npm_and_yarn-next_js-acorn-6.4.1` + separator: "-" + # Allow up to 5 open pull requests for GitHub Actions open-pull-requests-limit: 5 reviewers: - - "carmocca" - "Lightning-AI/teams/core-lightning" diff --git a/.github/workflows/ci-app_examples.yml b/.github/workflows/ci-app_examples.yml index 30d29a853597e..0cbdc387d13c6 100644 --- a/.github/workflows/ci-app_examples.yml +++ b/.github/workflows/ci-app_examples.yml @@ -93,7 +93,7 @@ jobs: coverage run --source lightning_app -m pytest -m "not cloud" tests_app_examples --timeout=300 -vvvv --junitxml=$PYTEST_ARTIFACT --durations=0 - name: Upload pytest test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: tests/results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml @@ -107,7 +107,7 @@ jobs: coverage report -i - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} file: tests/coverage.xml diff --git a/.github/workflows/ci-app_tests.yml b/.github/workflows/ci-app_tests.yml index 3993c31afab0c..ae8416634a2cb 100644 --- a/.github/workflows/ci-app_tests.yml +++ b/.github/workflows/ci-app_tests.yml @@ -100,7 +100,7 @@ jobs: coverage run --source lightning_app -m pytest -m "not cloud" tests_app --timeout=300 -vvvv --junitxml=$PYTEST_ARTIFACT --durations=0 - name: Upload pytest test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: tests/results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml @@ -114,7 +114,7 @@ jobs: coverage report -i - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} file: tests/coverage.xml diff --git a/.github/workflows/ci-pytorch_dockers.yml b/.github/workflows/ci-pytorch_dockers.yml deleted file mode 100644 index 69d5955c5db33..0000000000000 --- a/.github/workflows/ci-pytorch_dockers.yml +++ /dev/null @@ -1,183 +0,0 @@ -name: Docker - PyTorch -# https://www.docker.com/blog/first-docker-github-action-is-here -# https://github.com/docker/build-push-action -# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows -on: # Trigger the workflow on push or pull request, but only for the master branch - push: - branches: [master, "release/*"] # include release branches like release/1.0.x - pull_request: - branches: [master, "release/*"] - paths: - - "!src/lightning_app/**" # todo: implement job skip - - "!tests/tests_app/**" # todo: implement job skip - - "!tests/tests_app_examples/**" # todo: implement job skip - - "!examples/app_*" # todo: implement job skip - - "dockers/**" - - "!dockers/README.md" - - "requirements/*" - - "requirements.txt" - - "environment.yml" - - ".github/workflows/*docker*.yml" - - ".github/workflows/events-nightly.yml" - - "setup.py" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }} - cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} - -jobs: - build-PL: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image - python_version: ["3.9"] - pytorch_version: ["1.10", "1.11"] - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Build PL Docker - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/release/Dockerfile - push: false - timeout-minutes: 50 - - build-XLA: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in '.circleci/config.yml`' - python_version: ["3.7"] - xla_version: ["1.11"] - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Build XLA Docker - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - XLA_VERSION=${{ matrix.xla_version }} - file: dockers/base-xla/Dockerfile - push: false - timeout-minutes: 60 - - build-CUDA: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - include: - # the config used in '.azure-pipelines/gpu-tests.yml' - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} - # latest (used in Tutorials) - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Build CUDA Docker - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - UBUNTU_VERSION=${{ matrix.ubuntu_version }} - file: dockers/base-cuda/Dockerfile - push: false - timeout-minutes: 95 - - build-Conda: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - include: - # see: https://pytorch.org/get-started/previous-versions/ - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Build Conda Docker - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - file: dockers/base-conda/Dockerfile - push: false - timeout-minutes: 95 - - build-ipu: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in 'dockers/ci-runner-ipu/Dockerfile' - python_version: ["3.9"] # latest - pytorch_version: ["1.9"] - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Build IPU Docker - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/base-ipu/Dockerfile - push: false - tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 50 - - - name: Build IPU CI runner Docker - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/ci-runner-ipu/Dockerfile - push: false - timeout-minutes: 60 - - build-hpu: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in 'dockers/ci-runner-hpu/Dockerfile' - gaudi_version: ["1.5.0"] - pytorch_version: ["1.11.0"] - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Build HPU CI runner Docker - uses: docker/build-push-action@v2 - with: - build-args: | - DIST=latest - GAUDI_VERSION=${{ matrix.gaudi_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/ci-runner-hpu/Dockerfile - push: false - timeout-minutes: 60 diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index c062e6e02acb1..43cf73052b757 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -79,7 +79,7 @@ jobs: run: coverage run --source pytorch_lightning -m pytest -v --timeout 150 --durations=50 --junitxml=results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml - name: Upload pytest results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-torch${{ matrix.pytorch-version }} path: tests/tests_pytorch/results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml @@ -93,7 +93,7 @@ jobs: coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 if: always() # see: https://github.com/actions/toolkit/issues/399 continue-on-error: true diff --git a/.github/workflows/ci-pytorch_test-full.yml b/.github/workflows/ci-pytorch_test-full.yml index 42ec2b71fd0b6..2db59a30f5b64 100644 --- a/.github/workflows/ci-pytorch_test-full.yml +++ b/.github/workflows/ci-pytorch_test-full.yml @@ -136,7 +136,7 @@ jobs: run: coverage run --source pytorch_lightning -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml - name: Upload pytest results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }} path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml @@ -160,7 +160,7 @@ jobs: coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 if: always() # see: https://github.com/actions/toolkit/issues/399 continue-on-error: true diff --git a/.github/workflows/ci-pytorch_test-slow.yml b/.github/workflows/ci-pytorch_test-slow.yml index 279c4ffe772a8..79cede5b59b8c 100644 --- a/.github/workflows/ci-pytorch_test-slow.yml +++ b/.github/workflows/ci-pytorch_test-slow.yml @@ -70,7 +70,7 @@ jobs: PL_RUN_SLOW_TESTS: 1 - name: Upload pytest test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }} path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}.xml @@ -84,7 +84,7 @@ jobs: coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 if: success() # see: https://github.com/actions/toolkit/issues/399 continue-on-error: true diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml new file mode 100644 index 0000000000000..c1f300d0ac996 --- /dev/null +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -0,0 +1,242 @@ +name: Docker + +on: + push: + branches: [master, "release/*"] + pull_request: + branches: [master, "release/*"] + paths: + - "dockers/**" + - "!dockers/README.md" + - "requirements/*" + - "requirements.txt" + - "environment.yml" + - ".github/workflows/*docker*.yml" + - ".github/workflows/events-nightly.yml" + - "setup.py" + schedule: + - cron: "0 0 * * *" # at the end of every day + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }} + cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} + +env: + PUSH_TO_HUB: ${{ github.event_name == 'schedule' }} + +jobs: + build-pl: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image + python_version: ["3.9"] + pytorch_version: ["1.10", "1.11"] + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + file: dockers/release/Dockerfile + push: false # pushed in release-docker.yml only when PL is released + timeout-minutes: 50 + + build-xla: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + # the config used in '.circleci/config.yml`' + python_version: ["3.7"] + xla_version: ["1.11"] + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v1 + if: env.PUSH_TO_HUB == 'true' + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + XLA_VERSION=${{ matrix.xla_version }} + file: dockers/base-xla/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:base-xla-py${{ matrix.python_version }}-torch${{ matrix.xla_version }} + timeout-minutes: 60 + - uses: ravsamhq/notify-slack-action@v1 + if: failure() && env.PUSH_TO_HUB == 'true' + with: + status: ${{ job.status }} + token: ${{ secrets.GITHUB_TOKEN }} + notification_title: ${{ format('XLA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.xla_version) }} + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01GD29QCAV>' # kaushikb11 + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + + build-cuda: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + # the config used in '.azure-pipelines/gpu-tests.yml' + - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} + - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} + # latest (used in Tutorials) + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} + - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v1 + if: env.PUSH_TO_HUB == 'true' + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + CUDA_VERSION=${{ matrix.cuda_version }} + UBUNTU_VERSION=${{ matrix.ubuntu_version }} + file: dockers/base-cuda/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} + timeout-minutes: 95 + - uses: ravsamhq/notify-slack-action@v1 + if: failure() && env.PUSH_TO_HUB == 'true' + with: + status: ${{ job.status }} + token: ${{ secrets.GITHUB_TOKEN }} + notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>' # akihironitta + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + + build-conda: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} + # nightly: add when there's a release candidate + # - {python_version: "3.9", pytorch_version: "1.12"} + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v1 + if: env.PUSH_TO_HUB == 'true' + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + CUDA_VERSION=${{ matrix.cuda_version }} + file: dockers/base-conda/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} + timeout-minutes: 95 + - uses: ravsamhq/notify-slack-action@v1 + if: failure() && env.PUSH_TO_HUB == 'true' + with: + status: ${{ job.status }} + token: ${{ secrets.GITHUB_TOKEN }} + notification_title: ${{ format('Conda; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>' # akihironitta + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + + build-ipu: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + # the config used in 'dockers/ci-runner-ipu/Dockerfile' + - {python_version: "3.9", pytorch_version: "1.9"} + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v1 + if: env.PUSH_TO_HUB == 'true' + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + file: dockers/base-ipu/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} + timeout-minutes: 100 + - uses: docker/build-push-action@v2 + with: + build-args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + file: dockers/ci-runner-ipu/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:ipu-ci-runner-py${{ matrix.python_version }} + timeout-minutes: 10 + - uses: ravsamhq/notify-slack-action@v1 + if: failure() && env.PUSH_TO_HUB == 'true' + with: + status: ${{ job.status }} + token: ${{ secrets.GITHUB_TOKEN }} + notification_title: ${{ format('IPU; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01BULUS2BG>' # SeanNaren + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + + build-hpu: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + # the config used in 'dockers/ci-runner-hpu/Dockerfile' + - {gaudi_version: "1.5.0", pytorch_version: "1.11.0"} + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v1 + if: env.PUSH_TO_HUB == 'true' + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - uses: docker/build-push-action@v2 + with: + build-args: | + DIST=latest + GAUDI_VERSION=${{ matrix.gaudi_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + file: dockers/ci-runner-hpu/Dockerfile + push: ${{ env.PUSH_TO_HUB }} + tags: pytorchlightning/pytorch_lightning:hpu-ci-runner-gaudi${{ matrix.gaudi_version }} + timeout-minutes: 10 + - uses: ravsamhq/notify-slack-action@v1 + if: failure() && env.PUSH_TO_HUB == 'true' + with: + status: ${{ job.status }} + token: ${{ secrets.GITHUB_TOKEN }} + notification_title: ${{ format('HPU; {0} py{1} for *{2}*', runner.os, matrix.gaudi_version, matrix.pytorch_version) }} + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U02PV6CL144> <@U0355SJN6HK>' # arao & Mythravarun N R + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml index 25a9b17d6914b..eadf11a56801d 100644 --- a/.github/workflows/docs-checks.yml +++ b/.github/workflows/docs-checks.yml @@ -101,7 +101,7 @@ jobs: make html --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going" - name: Upload built docs - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: docs-results-${{ github.sha }} path: docs/build/html/ diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index 0325671413dbb..9279b95c767ec 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -7,9 +7,6 @@ on: # At the end of every day - cron: "0 0 * * *" -env: - PUSH_TO_HUB: true - # based on https://github.com/pypa/gh-action-pypi-publish jobs: pypi-release: @@ -59,258 +56,3 @@ jobs: message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@UR9FXE6QG>' #Borda env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - docker-XLA: - if: ${{ github.repository_owner == 'Lightning-AI' }} - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in '.circleci/config.yml`' - python_version: ["3.7"] - xla_version: ["1.8"] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - # https://github.com/docker/setup-buildx-action - # Set up Docker Buildx - to use cache-from and cache-to argument of buildx command - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish XLA to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - XLA_VERSION=${{ matrix.xla_version }} - file: dockers/base-xla/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-xla-py${{ matrix.python_version }}-torch${{ matrix.xla_version }} - timeout-minutes: 55 - - # report failure to Slack - - name: Slack notification - if: failure() && github.event_name == 'schedule' - uses: ravsamhq/notify-slack-action@v1 - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('XLA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.xla_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01GD29QCAV>' #kaushikb11 - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - docker-CUDA: - if: ${{ github.repository_owner == 'Lightning-AI' }} - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - include: - # the config used in '.azure-pipelines/gpu-tests.yml' - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} - # latest (used in Tutorials) - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} - steps: - - name: Checkout - uses: actions/checkout@v2 - - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish CUDA to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - UBUNTU_VERSION=${{ matrix.ubuntu_version }} - file: dockers/base-cuda/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 95 - - # report failure to Slack - - name: Slack notification - if: failure() && github.event_name == 'schedule' - uses: ravsamhq/notify-slack-action@v1 - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>' #akihironitta - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - docker-Conda: - if: ${{ github.repository_owner == 'Lightning-AI' }} - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - include: - # see: https://pytorch.org/get-started/previous-versions/ - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - # nightly: add when there's a release candidate - # - {python_version: "3.9", pytorch_version: "1.12"} - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish Conda to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - file: dockers/base-conda/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 95 - - # report failure to Slack - - name: Slack notification - if: failure() && github.event_name == 'schedule' - uses: ravsamhq/notify-slack-action@v1 - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('Conda; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>' #akihironitta - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - docker-IPU: - if: ${{ github.repository_owner == 'Lightning-AI' }} - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in 'dockers/ci-runner-ipu/Dockerfile' - include: - - {python_version: "3.9", pytorch_version: "1.9"} - - steps: - - name: Checkout - uses: actions/checkout@v2 - - # https://github.com/docker/setup-buildx-action - # Set up Docker Buildx - to use cache-from and cache-to argument of buildx command - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish IPU base to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/base-ipu/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} - timeout-minutes: 55 - - - name: Publish IPU CI runner to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/ci-runner-ipu/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:ipu-ci-runner-py${{ matrix.python_version }} - timeout-minutes: 55 - - # report failure to Slack - - name: Slack notification - if: failure() && github.event_name == 'schedule' - uses: ravsamhq/notify-slack-action@v1 - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('IPU; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01BULUS2BG>' #SeanNaren - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - docker-HPU: - if: ${{ github.repository_owner == 'Lightning-AI' }} - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - # the config used in 'dockers/ci-runner-hpu/Dockerfile' - include: - - {gaudi_version: "1.5.0", pytorch_version: "1.11.0"} - - steps: - - name: Checkout - uses: actions/checkout@v2 - - # https://github.com/docker/setup-buildx-action - # Set up Docker Buildx - to use cache-from and cache-to argument of buildx command - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish HPU CI runner to Docker Hub - # publish master/release - uses: docker/build-push-action@v2 - with: - build-args: | - DIST=latest - GAUDI_VERSION=${{ matrix.gaudi_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - file: dockers/ci-runner-hpu/Dockerfile - push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:hpu-ci-runner-gaudi${{ matrix.gaudi_version }} - timeout-minutes: 55 - - # report failure to Slack - - name: Slack notification - if: failure() && github.event_name == 'schedule' - uses: ravsamhq/notify-slack-action@v1 - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('HPU; {0} py{1} for *{2}*', runner.os, matrix.gaudi_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U02PV6CL144> <@U0355SJN6HK>' #arao & Mythravarun N R - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index fe060c10cb980..db5a4fcffb927 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -12,4 +12,3 @@ jobs: - uses: actions/labeler@v4 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" - sync-labels: true diff --git a/docs/source-pytorch/common/lightning_module.rst b/docs/source-pytorch/common/lightning_module.rst index 636777ec7e9e5..bf774b02a2f8a 100644 --- a/docs/source-pytorch/common/lightning_module.rst +++ b/docs/source-pytorch/common/lightning_module.rst @@ -1626,15 +1626,3 @@ on_after_batch_transfer .. automethod:: pytorch_lightning.core.module.LightningModule.on_after_batch_transfer :noindex: - -add_to_queue -~~~~~~~~~~~~ - -.. automethod:: pytorch_lightning.core.module.LightningModule.add_to_queue - :noindex: - -get_from_queue -~~~~~~~~~~~~~~ - -.. automethod:: pytorch_lightning.core.module.LightningModule.get_from_queue - :noindex: diff --git a/pyproject.toml b/pyproject.toml index 770f0983c3139..055677a04e977 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,9 +59,7 @@ module = [ "pytorch_lightning.demos.boring_classes", "pytorch_lightning.demos.mnist_datamodule", "pytorch_lightning.distributed.dist", - "pytorch_lightning.loggers.base", "pytorch_lightning.loggers.comet", - "pytorch_lightning.loggers.csv_logs", "pytorch_lightning.loggers.mlflow", "pytorch_lightning.loggers.neptune", "pytorch_lightning.loggers.tensorboard", @@ -90,7 +88,6 @@ module = [ "pytorch_lightning.trainer.supporters", "pytorch_lightning.trainer.trainer", "pytorch_lightning.tuner.batch_size_scaling", - "pytorch_lightning.tuner.tuning", "pytorch_lightning.utilities.auto_restart", "pytorch_lightning.utilities.data", "pytorch_lightning.utilities.distributed", diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 0d42f819d9d1d..9b3cefc66bd19 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -174,6 +174,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed deprecated `IndexBatchSamplerWrapper.batch_indices` ([#13565](https://github.com/PyTorchLightning/pytorch-lightning/pull/13565)) + + +- Removed the deprecated `LightningModule.add_to_queue` and `LightningModule.get_from_queue` method ([#13600](https://github.com/PyTorchLightning/pytorch-lightning/pull/13600)) + + - Removed deprecated `pytorch_lightning.core.decorators.parameter_validation` from `decorators` ([#13514](https://github.com/Lightning-AI/lightning/pull/13514)) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index ef4a869b3c502..022f7ab678e78 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1955,28 +1955,6 @@ def use_amp(self, use_amp: bool) -> None: ) self._use_amp = use_amp - def add_to_queue(self, queue: pl.strategies.launchers.spawn._FakeQueue) -> None: - """Appends the :attr:`trainer.callback_metrics` dictionary to the given queue. To avoid issues with memory - sharing, we cast the data to numpy. - - Args: - queue: the instance of the queue to append the data. - - .. deprecated:: v1.5 - This method was deprecated in v1.5 and will be removed in v1.7. - """ - - def get_from_queue(self, queue: pl.strategies.launchers.spawn._FakeQueue) -> None: - """Retrieve the :attr:`trainer.callback_metrics` dictionary from the given queue. To preserve consistency, - we cast back the data to ``torch.Tensor``. - - Args: - queue: the instance of the queue from where to get the data. - - .. deprecated:: v1.5 - This method was deprecated in v1.5 and will be removed in v1.7. - """ - @contextmanager def _prevent_trainer_and_dataloaders_deepcopy(self) -> None: self._should_prevent_trainer_and_dataloaders_deepcopy = True diff --git a/src/pytorch_lightning/loggers/base.py b/src/pytorch_lightning/loggers/base.py index 1da0749e460fe..43c572e3953c0 100644 --- a/src/pytorch_lightning/loggers/base.py +++ b/src/pytorch_lightning/loggers/base.py @@ -12,16 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Callable, Dict, Mapping, Optional, Sequence + +import numpy as np + import pytorch_lightning.loggers.logger as logger from pytorch_lightning.utilities.warnings import rank_zero_deprecation -def rank_zero_experiment(*args, **kwargs) -> None: # type: ignore[no-untyped-def] +def rank_zero_experiment(fn: Callable) -> Callable: rank_zero_deprecation( "The `pytorch_lightning.loggers.base.rank_zero_experiment` is deprecated in v1.7" " and will be removed in v1.9. Please use `pytorch_lightning.loggers.logger.rank_zero_experiment` instead." ) - return logger.rank_zero_experiment(*args, **kwargs) + return logger.rank_zero_experiment(fn) class LightningLoggerBase(logger.Logger): @@ -77,9 +81,13 @@ def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] super().__init__(*args, **kwargs) -def merge_dicts(*args, **kwargs) -> None: # type: ignore[no-untyped-def] +def merge_dicts( + dicts: Sequence[Mapping], + agg_key_funcs: Optional[Mapping] = None, + default_func: Callable[[Sequence[float]], float] = np.mean, +) -> Dict: rank_zero_deprecation( "The `pytorch_lightning.loggers.base.merge_dicts` is deprecated in v1.7" " and will be removed in v1.9. Please use `pytorch_lightning.loggers.logger.merge_dicts` instead." ) - return logger.merge_dicts(*args, **kwargs) + return logger.merge_dicts(dicts=dicts, agg_key_funcs=agg_key_funcs, default_func=default_func) diff --git a/src/pytorch_lightning/loggers/csv_logs.py b/src/pytorch_lightning/loggers/csv_logs.py index 3316a5e86e64b..72d21ae2c4974 100644 --- a/src/pytorch_lightning/loggers/csv_logs.py +++ b/src/pytorch_lightning/loggers/csv_logs.py @@ -22,7 +22,7 @@ import logging import os from argparse import Namespace -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, List, Optional, Union from torch import Tensor @@ -49,8 +49,8 @@ class ExperimentWriter: NAME_METRICS_FILE = "metrics.csv" def __init__(self, log_dir: str) -> None: - self.hparams = {} - self.metrics = [] + self.hparams: Dict[str, Any] = {} + self.metrics: List[Dict[str, float]] = [] self.log_dir = log_dir if os.path.exists(self.log_dir) and os.listdir(self.log_dir): @@ -69,7 +69,7 @@ def log_hparams(self, params: Dict[str, Any]) -> None: def log_metrics(self, metrics_dict: Dict[str, float], step: Optional[int] = None) -> None: """Record metrics.""" - def _handle_value(value): + def _handle_value(value: Union[Tensor, Any]) -> Any: if isinstance(value, Tensor): return value.item() return value @@ -126,7 +126,7 @@ class CSVLogger(Logger): def __init__( self, save_dir: str, - name: Optional[str] = "lightning_logs", + name: str = "lightning_logs", version: Optional[Union[int, str]] = None, prefix: str = "", flush_logs_every_n_steps: int = 100, @@ -136,7 +136,7 @@ def __init__( self._name = name or "" self._version = version self._prefix = prefix - self._experiment = None + self._experiment: Optional[ExperimentWriter] = None self._flush_logs_every_n_steps = flush_logs_every_n_steps @property @@ -161,7 +161,7 @@ def log_dir(self) -> str: return log_dir @property - def save_dir(self) -> Optional[str]: + def save_dir(self) -> str: """The current directory where logs are saved. Returns: @@ -169,7 +169,7 @@ def save_dir(self) -> Optional[str]: """ return self._save_dir - @property + @property # type: ignore[misc] @rank_zero_experiment def experiment(self) -> ExperimentWriter: r""" @@ -182,7 +182,7 @@ def experiment(self) -> ExperimentWriter: self.logger.experiment.some_experiment_writer_function() """ - if self._experiment: + if self._experiment is not None: return self._experiment os.makedirs(self.root_dir, exist_ok=True) @@ -220,7 +220,7 @@ def name(self) -> str: return self._name @property - def version(self) -> int: + def version(self) -> Union[int, str]: """Gets the version of the experiment. Returns: @@ -230,7 +230,7 @@ def version(self) -> int: self._version = self._get_next_version() return self._version - def _get_next_version(self): + def _get_next_version(self) -> int: root_dir = self.root_dir if not os.path.isdir(root_dir): diff --git a/src/pytorch_lightning/loggers/logger.py b/src/pytorch_lightning/loggers/logger.py index 4113b61627d8f..03d934aa58760 100644 --- a/src/pytorch_lightning/loggers/logger.py +++ b/src/pytorch_lightning/loggers/logger.py @@ -38,13 +38,13 @@ def rank_zero_experiment(fn: Callable) -> Callable: def experiment(self) -> Union[Any, DummyExperiment]: # type: ignore[no-untyped-def] """ Note: - `self` is a custom logger instance. The loggers typical wrap an `experiment` method - with a @rank_zero_experiment decorator. An exception being `loggers.neptune` wraps - `experiment` and `run` with rank_zero_experiment. + ``self`` is a custom logger instance. The loggers typically wrap an ``experiment`` method + with a ``@rank_zero_experiment`` decorator. An exception is that ``loggers.neptune`` wraps + ``experiment`` and ``run`` with rank_zero_experiment. - Union[Any, DummyExperiment] is used because the wrapped hooks have several returns - types that are specific to the custom logger. The return type can be considered as - Union[return type of logger.experiment, DummyExperiment] + ``Union[Any, DummyExperiment]`` is used because the wrapped hooks have several return + types that are specific to the custom logger. The return type here can be considered as + ``Union[return type of logger.experiment, DummyExperiment]``. """ @rank_zero_only diff --git a/src/pytorch_lightning/overrides/distributed.py b/src/pytorch_lightning/overrides/distributed.py index 15a8632af938b..8048d83252af7 100644 --- a/src/pytorch_lightning/overrides/distributed.py +++ b/src/pytorch_lightning/overrides/distributed.py @@ -20,7 +20,6 @@ from torch.utils.data import BatchSampler, Dataset, DistributedSampler, Sampler from pytorch_lightning.overrides.base import _LightningModuleWrapperBase -from pytorch_lightning.utilities import rank_zero_deprecation from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -176,28 +175,10 @@ class IndexBatchSamplerWrapper: def __init__(self, sampler: BatchSampler) -> None: self.seen_batch_indices: List[List[int]] = [] self._sampler = sampler - self._batch_indices: List[int] = [] - - @property - def batch_indices(self) -> List[int]: - rank_zero_deprecation( - "The attribute `IndexBatchSamplerWrapper.batch_indices` was deprecated in v1.5 and will be removed in" - " v1.7. Access the full list `seen_batch_indices` instead." - ) - return self._batch_indices - - @batch_indices.setter - def batch_indices(self, indices: List[int]) -> None: - rank_zero_deprecation( - "The attribute `IndexBatchSamplerWrapper.batch_indices` was deprecated in v1.5 and will be removed in" - " v1.7. Access the full list `seen_batch_indices` instead." - ) - self._batch_indices = indices def __iter__(self) -> Iterator[List[int]]: self.seen_batch_indices = [] for batch in self._sampler: - self._batch_indices = batch self.seen_batch_indices.append(batch) yield batch diff --git a/src/pytorch_lightning/strategies/launchers/spawn.py b/src/pytorch_lightning/strategies/launchers/spawn.py index d94909b778a83..0a92ceee5aacf 100644 --- a/src/pytorch_lightning/strategies/launchers/spawn.py +++ b/src/pytorch_lightning/strategies/launchers/spawn.py @@ -26,7 +26,6 @@ from pytorch_lightning.strategies.strategy import Strategy from pytorch_lightning.trainer.states import TrainerFn, TrainerState from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device -from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.rank_zero import rank_zero_debug from pytorch_lightning.utilities.types import _PATH @@ -122,10 +121,6 @@ def _recover_results_in_main_process(self, spawn_output: "_SpawnOutput", trainer trainer.state = spawn_output.trainer_state # get the `callback_metrics` and set it to the trainer - if is_overridden("get_from_queue", trainer.lightning_module): - # only in case the user does not override it. - # TODO: Remove the if in v1.7 - trainer.lightning_module.get_from_queue(spawn_output.extra) self.get_from_queue(trainer, spawn_output.extra) def _collect_rank_zero_results(self, trainer: "pl.Trainer", results: Any) -> Optional["_SpawnOutput"]: @@ -151,9 +146,6 @@ def _collect_rank_zero_results(self, trainer: "pl.Trainer", results: Any) -> Opt # adds the `callback_metrics` to the queue extra = _FakeQueue() - if is_overridden("add_to_queue", trainer.lightning_module): - # TODO: Remove the if in v1.7 - trainer.lightning_module.add_to_queue(extra) self.add_to_queue(trainer, extra) return _SpawnOutput(best_model_path, weights_path, trainer.state, results, extra) diff --git a/src/pytorch_lightning/strategies/launchers/xla_spawn.py b/src/pytorch_lightning/strategies/launchers/xla_spawn.py index 13c948577ca5b..9c47e3b325cac 100644 --- a/src/pytorch_lightning/strategies/launchers/xla_spawn.py +++ b/src/pytorch_lightning/strategies/launchers/xla_spawn.py @@ -23,7 +23,6 @@ from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _TPU_AVAILABLE from pytorch_lightning.utilities.apply_func import move_data_to_device -from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.rank_zero import rank_zero_debug if _TPU_AVAILABLE: @@ -136,9 +135,6 @@ def _collect_rank_zero_results(self, trainer: "pl.Trainer", results: Any) -> Opt # adds the `callback_metrics` to the queue extra = _FakeQueue() - if is_overridden("add_to_queue", trainer.lightning_module): - # TODO: Remove the if in v1.7 - trainer.lightning_module.add_to_queue(extra) self.add_to_queue(trainer, extra) return _SpawnOutput(best_model_path, weights_path, trainer.state, results, extra) diff --git a/src/pytorch_lightning/trainer/configuration_validator.py b/src/pytorch_lightning/trainer/configuration_validator.py index ceeec9f7fcbcd..c53e22ea74a76 100644 --- a/src/pytorch_lightning/trainer/configuration_validator.py +++ b/src/pytorch_lightning/trainer/configuration_validator.py @@ -46,7 +46,6 @@ def verify_loop_configurations(trainer: "pl.Trainer") -> None: __verify_eval_loop_configuration(trainer, model, "predict") __verify_dp_batch_transfer_support(trainer, model) - _check_add_get_queue(model) # TODO: Delete _check_on_post_move_to_device in v1.7 _check_on_post_move_to_device(model) _check_deprecated_callback_hooks(trainer) @@ -218,23 +217,6 @@ def __check_training_step_requires_dataloader_iter(model: "pl.LightningModule") ) -def _check_add_get_queue(model: "pl.LightningModule") -> None: - r""" - Checks if add_to_queue or get_from_queue is overridden and sends a deprecation warning. - - Args: - model: The lightning module - """ - if is_overridden("add_to_queue", model): - rank_zero_deprecation( - "The `LightningModule.add_to_queue` method was deprecated in v1.5 and will be removed in v1.7." - ) - if is_overridden("get_from_queue", model): - rank_zero_deprecation( - "The `LightningModule.get_from_queue` method was deprecated in v1.5 and will be removed in v1.7." - ) - - # TODO: Delete _check_on_hpc_hooks in v1.8 def _check_on_hpc_hooks(model: "pl.LightningModule") -> None: if is_overridden("on_hpc_save", model): diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index acde9224d501e..36c067a438c59 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -86,7 +86,7 @@ from pytorch_lightning.trainer.states import RunningStage, TrainerFn, TrainerState, TrainerStatus from pytorch_lightning.trainer.supporters import CombinedLoader from pytorch_lightning.tuner.lr_finder import _LRFinder -from pytorch_lightning.tuner.tuning import Tuner +from pytorch_lightning.tuner.tuning import _TunerResult, Tuner from pytorch_lightning.utilities import ( _HPU_AVAILABLE, _IPU_AVAILABLE, @@ -306,7 +306,7 @@ def __init__( Default: ``50``. enable_progress_bar: Whether to enable to progress bar by default. - Default: ``False``. + Default: ``True``. profiler: To profile individual steps during training and assist in identifying bottlenecks. Default: ``None``. @@ -1015,7 +1015,7 @@ def tune( datamodule: Optional[LightningDataModule] = None, scale_batch_size_kwargs: Optional[Dict[str, Any]] = None, lr_find_kwargs: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Optional[Union[int, _LRFinder]]]: + ) -> _TunerResult: r""" Runs routines to tune hyperparameters before training. diff --git a/src/pytorch_lightning/tuner/tuning.py b/src/pytorch_lightning/tuner/tuning.py index b1a38bd27688c..d5ec58c78c502 100644 --- a/src/pytorch_lightning/tuner/tuning.py +++ b/src/pytorch_lightning/tuner/tuning.py @@ -13,6 +13,8 @@ # limitations under the License. from typing import Any, Dict, Optional, Union +from typing_extensions import NotRequired, TypedDict + import pytorch_lightning as pl from pytorch_lightning.trainer.states import TrainerStatus from pytorch_lightning.tuner.batch_size_scaling import scale_batch_size @@ -21,6 +23,10 @@ from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS +class _TunerResult(TypedDict): + lr_find: NotRequired[Optional[_LRFinder]] + scale_batch_size: NotRequired[Optional[int]] + class Tuner: """Tuner class to tune your model.""" @@ -36,11 +42,15 @@ def _tune( model: "pl.LightningModule", scale_batch_size_kwargs: Optional[Dict[str, Any]] = None, lr_find_kwargs: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Optional[Union[int, _LRFinder]]]: + ) -> _TunerResult: scale_batch_size_kwargs = scale_batch_size_kwargs or {} lr_find_kwargs = lr_find_kwargs or {} # return a dict instead of a tuple so BC is not broken if a new tuning procedure is added - result = {} +<<<<<<< HEAD + result: _TunerResult = {} +======= + result: Dict[str, Any] = {} +>>>>>>> 7c4b7df27bfa14ef0611f67cbeba3d9f356c4f44 self.trainer.strategy.connect(model) @@ -84,7 +94,7 @@ def scale_batch_size( init_val: int = 2, max_trials: int = 25, batch_arg_name: str = "batch_size", - ) -> Optional[int]: + ) -> Optional[Union[int, _LRFinder]]: """Iteratively try to find the largest batch size for a given model that does not give an out of memory (OOM) error. @@ -151,7 +161,7 @@ def lr_find( mode: str = "exponential", early_stop_threshold: float = 4.0, update_attr: bool = False, - ) -> Optional[_LRFinder]: + ) -> Optional[Union[int, _LRFinder]]: """Enables the user to do a range test of good initial learning rates, to reduce the amount of guesswork in picking a good starting learning rate. diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-7.py b/tests/tests_pytorch/deprecated_api/test_remove_1-7.py index 17cccbfa80a5e..4187757fa3980 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-7.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-7.py @@ -15,14 +15,12 @@ import os from re import escape from unittest import mock -from unittest.mock import Mock import pytest import torch from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper from pytorch_lightning.plugins.environments import ( KubeflowEnvironment, LightningEnvironment, @@ -34,25 +32,6 @@ from tests_pytorch.plugins.environments.test_lsf_environment import _make_rankfile -class BoringCallbackDDPSpawnModel(BoringModel): - def add_to_queue(self, queue): - ... - - def get_from_queue(self, queue): - ... - - -def test_v1_7_0_deprecate_add_get_queue(tmpdir): - model = BoringCallbackDDPSpawnModel() - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) - - with pytest.deprecated_call(match=r"`LightningModule.add_to_queue` method was deprecated in v1.5"): - trainer.fit(model) - - with pytest.deprecated_call(match=r"`LightningModule.get_from_queue` method was deprecated in v1.5"): - trainer.fit(model) - - def test_v1_7_0_deprecate_lightning_distributed(tmpdir): with pytest.deprecated_call(match="LightningDistributed is deprecated in v1.5 and will be removed in v1.7."): from pytorch_lightning.distributed.dist import LightningDistributed @@ -167,15 +146,6 @@ def is_using_torchelastic(): MyClusterEnvironment() -def test_v1_7_0_index_batch_sampler_wrapper_batch_indices(): - sampler = IndexBatchSamplerWrapper(Mock()) - with pytest.deprecated_call(match="was deprecated in v1.5 and will be removed in v1.7"): - _ = sampler.batch_indices - - with pytest.deprecated_call(match="was deprecated in v1.5 and will be removed in v1.7"): - sampler.batch_indices = [] - - def test_v1_7_0_post_dispatch_hook(): class CustomPlugin(SingleDeviceStrategy): def post_dispatch(self, trainer): diff --git a/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py b/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py index 9a072368b0136..5af3df4613a2c 100644 --- a/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py +++ b/tests/tests_pytorch/strategies/test_ddp_spawn_strategy.py @@ -44,14 +44,6 @@ def validation_step(self, batch, batch_idx): self.log(self.name, self.val) return super().validation_step(batch, batch_idx) - def add_to_queue(self, queue) -> None: - queue.put("test_val") - return super().add_to_queue(queue) - - def get_from_queue(self, queue) -> None: - self.test_val = queue.get() - return super().get_from_queue(queue) - @RunIf(skip_windows=True) def test_ddp_cpu(): @@ -67,31 +59,13 @@ def test_ddp_cpu(): trainer.fit(model) -@RunIf(min_cuda_gpus=2) -def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctly when training for DDPSpawnStrategy and tests add_to_queue/get_from_queue - with Lightning Module (deprecated way).""" - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator="gpu", devices=2, strategy="ddp_spawn") - - assert isinstance(trainer.strategy, DDPSpawnStrategy) - assert trainer.strategy.root_device == torch.device("cuda:0") - - val: float = 1.0 - val_name: str = "val_acc" - model = BoringCallbackDDPSpawnModel(val_name, val) - dm = BoringDataModule() - trainer.fit(model, datamodule=dm) - assert trainer.callback_metrics[val_name] == torch.tensor(val) - assert model.test_val == "test_val" - - class CustomSpawnLauncher(_SpawnLauncher): def add_to_queue(self, trainer, queue) -> None: - queue.put("new_test_val") + queue.put("test_val") return super().add_to_queue(trainer, queue) def get_from_queue(self, trainer: Trainer, queue) -> None: - trainer.strategy.new_test_val = queue.get() + trainer.strategy.test_val = queue.get() return super().get_from_queue(trainer, queue) @@ -115,7 +89,7 @@ def test_ddp_spawn_add_get_queue(tmpdir): dm = BoringDataModule() trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) - assert ddp_spawn_strategy.new_test_val == "new_test_val" + assert ddp_spawn_strategy.test_val == "test_val" class BoringModelDDP(BoringModel): diff --git a/tests/tests_pytorch/trainer/test_dataloaders.py b/tests/tests_pytorch/trainer/test_dataloaders.py index 6b01150c80857..5bea5a4cbbe1c 100644 --- a/tests/tests_pytorch/trainer/test_dataloaders.py +++ b/tests/tests_pytorch/trainer/test_dataloaders.py @@ -19,7 +19,7 @@ import torch from torch.utils.data import RandomSampler from torch.utils.data.dataloader import DataLoader -from torch.utils.data.dataset import Dataset, IterableDataset, Subset +from torch.utils.data.dataset import Dataset, IterableDataset from torch.utils.data.distributed import DistributedSampler from torch.utils.data.sampler import SequentialSampler @@ -831,53 +831,6 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir): assert trainer.state.finished, "DDP Training failed" -@RunIf(min_cuda_gpus=3) -def test_batch_size_smaller_than_num_gpus(tmpdir): - # we need at least 3 gpus for this test - num_gpus = 3 - batch_size = 3 - - class CurrentTestModel(BoringModel): - def __init__(self, batch_size) -> None: - super().__init__() - self.save_hyperparameters() - # batch norm doesn't work with batch size 1, we replace it - self.c_d1_bn = torch.nn.ReLU() - - def training_step(self, *args, **kwargs): - output = super().training_step(*args, **kwargs) - loss = output["loss"] - # we make sure to add some metrics to the output dict, - # this is essential for this test - output["progress_bar"] = {"train_loss": loss} - return output - - def train_dataloader(self): - dataset = RandomDataset(32, 64) - # construct a dataset with a size that is not divisible by num_gpus - # therefore the last batch will have a size < num_gpus - size = num_gpus * self.hparams.batch_size + (num_gpus - 1) - dataset = Subset(dataset, range(size)) - dataloader = DataLoader(dataset, batch_size=self.hparams.batch_size, drop_last=False) - return dataloader - - model = CurrentTestModel(batch_size=batch_size) - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - limit_train_batches=0.1, - limit_val_batches=0, - accelerator="gpu", - devices=num_gpus, - ) - - # we expect the reduction for the metrics also to happen on the last batch - # where we will get fewer metrics than gpus - trainer.fit(model) - assert trainer.state.finished, f"Training failed with {trainer.state}" - - @pytest.mark.parametrize( ["multiple_trainloader_mode", "num_training_batches"], [("min_size", 16), ("max_size_cycle", 64)],