From 37885d5d964e3ac3f3da9c9e2a2c67160d06d1d0 Mon Sep 17 00:00:00 2001 From: anton-l Date: Tue, 20 Sep 2022 16:37:25 +0200 Subject: [PATCH 1/7] Add smoke tests for the training examples --- .github/workflows/push_tests.yml | 53 ++++++++++++- examples/conftest.py | 45 +++++++++++ examples/test_examples.py | 123 +++++++++++++++++++++++++++++++ 3 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 examples/conftest.py create mode 100644 examples/test_examples.py diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index d1317ac92db2..523b91810963 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -59,5 +59,56 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v2 with: - name: push_torch_test_reports + name: torch_test_reports + path: reports + + + + run_examples_single_gpu: + name: Diffusers tests + strategy: + fail-fast: false + matrix: + machine_type: [ single-gpu ] + runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] + container: + image: nvcr.io/nvidia/pytorch:22.07-py3 + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip uninstall -y torch torchvision torchtext + python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 + python -m pip install -e .[quality,test,training] + + - name: Environment + run: | + python utils/print_env.py + + - name: Run example tests on GPU + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_gpu examples/ + + - name: Failure short reports + if: ${{ failure() }} + run: cat reports/examples_torch_gpu_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: examples_test_reports path: reports diff --git a/examples/conftest.py b/examples/conftest.py new file mode 100644 index 000000000000..188ff535ecb6 --- /dev/null +++ b/examples/conftest.py @@ -0,0 +1,45 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# tests directory-specific settings - this file is run automatically +# by pytest before any tests are run + +import sys +import warnings +from os.path import abspath, dirname, join + + +# allow having multiple repository checkouts and not needing to remember to rerun +# 'pip install -e .[dev]' when switching between checkouts and running tests. +git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src")) +sys.path.insert(1, git_repo_path) + + +# silence FutureWarning warnings in tests since often we can't act on them until +# they become normal warnings - i.e. the tests still need to test the current functionality +warnings.simplefilter(action="ignore", category=FutureWarning) + + +def pytest_addoption(parser): + from diffusers.testing_utils import pytest_addoption_shared + + pytest_addoption_shared(parser) + + +def pytest_terminal_summary(terminalreporter): + from diffusers.testing_utils import pytest_terminal_summary_main + + make_reports = terminalreporter.config.getoption("--make-reports") + if make_reports: + pytest_terminal_summary_main(terminalreporter, id=make_reports) diff --git a/examples/test_examples.py b/examples/test_examples.py new file mode 100644 index 000000000000..667b7c873d6b --- /dev/null +++ b/examples/test_examples.py @@ -0,0 +1,123 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc.. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import os +import shutil +import subprocess +import sys +import tempfile +import unittest +from typing import List + +from accelerate.utils import write_basic_config + + +logging.basicConfig(level=logging.DEBUG) + +logger = logging.getLogger() + + +# These utils relate to ensuring the right error message is received when running scripts +class SubprocessCallException(Exception): + pass + + +def run_command(command: List[str], return_stdout=False): + """ + Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture + if an error occured while running `command` + """ + try: + output = subprocess.check_output(command, stderr=subprocess.STDOUT) + if return_stdout: + if hasattr(output, "decode"): + output = output.decode("utf-8") + return output + except subprocess.CalledProcessError as e: + raise SubprocessCallException( + f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}" + ) from e + + +class ExamplesTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls._tmpdir = tempfile.mkdtemp() + cls.configPath = os.path.join(cls._tmpdir, "default_config.yml") + + write_basic_config(save_location=cls.configPath) + cls._launch_args = ["accelerate", "launch", "--config_file", cls.configPath] + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + shutil.rmtree(cls._tmpdir) + + def test_train_unconditional(self): + stream_handler = logging.StreamHandler(sys.stdout) + logger.addHandler(stream_handler) + + with tempfile.TemporaryDirectory() as tmpdir: + test_args = f""" + examples/unconditional_image_generation/train_unconditional.py + --dataset_name huggan/flowers-102-categories + --resolution 64 + --output_dir {tmpdir} + --train_batch_size 4 + --num_epochs 2 + --gradient_accumulation_steps 2 + --learning_rate 1e-3 + --lr_warmup_steps 5 + --mixed_precision fp16 + """.split() + + run_command(self._launch_args + test_args, return_stdout=True) + # save_pretrained smoke test + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "unet", "diffusion_pytorch_model.bin"))) + # model card generation smoke test + with open(os.path.join(tmpdir, "README.md"), "r") as f: + self.assertTrue("learning_rate: 0.001" in f.read()) + + def test_textual_inversion(self): + stream_handler = logging.StreamHandler(sys.stdout) + logger.addHandler(stream_handler) + + with tempfile.TemporaryDirectory() as tmpdir: + test_args = f""" + examples/textual_inversion/textual_inversion.py + --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 + --use_auth_token + --train_data_dir docs/source/imgs + --learnable_property object + --placeholder_token + --initializer_token toy + --resolution 64 + --train_batch_size 1 + --gradient_accumulation_steps 2 + --max_train_steps 10 + --learning_rate 5.0e-04 + --scale_lr + --lr_scheduler constant + --lr_warmup_steps 0 + --output_dir {tmpdir} + --mixed_precision fp16 + """.split() + + run_command(self._launch_args + test_args) + # save_pretrained smoke test + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "learned_embeds.bin"))) From 91b275bb08a7315704b47a281c4d0109b3c47dc8 Mon Sep 17 00:00:00 2001 From: anton-l Date: Tue, 20 Sep 2022 16:40:56 +0200 Subject: [PATCH 2/7] upd --- examples/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/conftest.py b/examples/conftest.py index 188ff535ecb6..a72bc85310d2 100644 --- a/examples/conftest.py +++ b/examples/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2020 The HuggingFace Team. All rights reserved. +# Copyright 2022 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From ccdc9fc7629d0934c6cb1bb7f68e86a08fea6d0d Mon Sep 17 00:00:00 2001 From: anton-l Date: Tue, 20 Sep 2022 16:43:38 +0200 Subject: [PATCH 3/7] use a dummy dataset --- examples/test_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/test_examples.py b/examples/test_examples.py index 667b7c873d6b..b7281a989ae7 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -75,7 +75,7 @@ def test_train_unconditional(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" examples/unconditional_image_generation/train_unconditional.py - --dataset_name huggan/flowers-102-categories + --dataset_name huggan/few-shot-aurora --resolution 64 --output_dir {tmpdir} --train_batch_size 4 From fe85e747d788f3855a283dff6c08a9d9b82ca1e9 Mon Sep 17 00:00:00 2001 From: anton-l Date: Tue, 20 Sep 2022 16:44:33 +0200 Subject: [PATCH 4/7] mark as slow --- examples/test_examples.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/test_examples.py b/examples/test_examples.py index b7281a989ae7..2016ecae07ab 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -24,6 +24,7 @@ from typing import List from accelerate.utils import write_basic_config +from diffusers.testing_utils import slow logging.basicConfig(level=logging.DEBUG) @@ -68,6 +69,7 @@ def tearDownClass(cls): super().tearDownClass() shutil.rmtree(cls._tmpdir) + @slow def test_train_unconditional(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -93,6 +95,7 @@ def test_train_unconditional(self): with open(os.path.join(tmpdir, "README.md"), "r") as f: self.assertTrue("learning_rate: 0.001" in f.read()) + @slow def test_textual_inversion(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) From 43020d9b5220545c01083fa5633ff44c95cfdf3a Mon Sep 17 00:00:00 2001 From: anton-l Date: Tue, 20 Sep 2022 16:46:54 +0200 Subject: [PATCH 5/7] cleanup --- examples/test_examples.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/test_examples.py b/examples/test_examples.py index 2016ecae07ab..41885d38fbac 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -54,7 +54,11 @@ def run_command(command: List[str], return_stdout=False): ) from e -class ExamplesTests(unittest.TestCase): +stream_handler = logging.StreamHandler(sys.stdout) +logger.addHandler(stream_handler) + + +class ExamplesTestsAccelerate(unittest.TestCase): @classmethod def setUpClass(cls): super().setUpClass() @@ -71,9 +75,6 @@ def tearDownClass(cls): @slow def test_train_unconditional(self): - stream_handler = logging.StreamHandler(sys.stdout) - logger.addHandler(stream_handler) - with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" examples/unconditional_image_generation/train_unconditional.py @@ -97,9 +98,6 @@ def test_train_unconditional(self): @slow def test_textual_inversion(self): - stream_handler = logging.StreamHandler(sys.stdout) - logger.addHandler(stream_handler) - with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" examples/textual_inversion/textual_inversion.py From caa28221a40af5afd07e57abf3cb175fd4119065 Mon Sep 17 00:00:00 2001 From: anton-l Date: Wed, 21 Sep 2022 13:36:00 +0200 Subject: [PATCH 6/7] Update test cases --- examples/test_examples.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/test_examples.py b/examples/test_examples.py index 41885d38fbac..0099d17e638d 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -82,8 +82,8 @@ def test_train_unconditional(self): --resolution 64 --output_dir {tmpdir} --train_batch_size 4 - --num_epochs 2 - --gradient_accumulation_steps 2 + --num_epochs 1 + --gradient_accumulation_steps 1 --learning_rate 1e-3 --lr_warmup_steps 5 --mixed_precision fp16 @@ -92,9 +92,9 @@ def test_train_unconditional(self): run_command(self._launch_args + test_args, return_stdout=True) # save_pretrained smoke test self.assertTrue(os.path.isfile(os.path.join(tmpdir, "unet", "diffusion_pytorch_model.bin"))) - # model card generation smoke test - with open(os.path.join(tmpdir, "README.md"), "r") as f: - self.assertTrue("learning_rate: 0.001" in f.read()) + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "scheduler", "scheduler_config.json"))) + # logging test + self.assertTrue(len(os.listdir(os.path.join(tmpdir, "logs", "train_unconditional"))) > 0) @slow def test_textual_inversion(self): From 11b678e2c1abad35eb6e34d6c90cb0bfe2d2c70a Mon Sep 17 00:00:00 2001 From: anton-l Date: Wed, 21 Sep 2022 13:37:35 +0200 Subject: [PATCH 7/7] naming --- .github/workflows/push_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 523b91810963..3db6814e071d 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -65,7 +65,7 @@ jobs: run_examples_single_gpu: - name: Diffusers tests + name: Examples tests strategy: fail-fast: false matrix: