Introduce the copy mechanism (huggingface#924)

anton-l · Prathik Rao · commit 21d570d3212e · 2022-10-26T15:10:09.000-07:00
* Introduce the copy mechanism

* init tests

* fix dummy tests

* with

* update copies tests
diff --git a/.github/workflows/pr_quality.yml b/.github/workflows/pr_quality.yml
@@ -31,3 +31,20 @@ jobs:
           isort --check-only examples tests src utils scripts
           flake8 examples tests src utils scripts
           doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
+
+  check_repository_consistency:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[quality]
+      - name: Check quality
+        run: |
+          python utils/check_copies.py
+          python utils/check_dummies.py
diff --git a/Makefile b/Makefile
@@ -67,6 +67,7 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
 # Make marked copies of snippets of codes conform to the original
 
 fix-copies:
+	python utils/check_copies.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
 
 # Run tests for the library
diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py
@@ -28,6 +28,7 @@
 
 
 @dataclass
+# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
 class DDIMSchedulerOutput(BaseOutput):
     """
     Output class for the scheduler's step function output.
diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py
@@ -26,6 +26,7 @@
 
 
 @dataclass
+# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->LMSDiscrete
 class LMSDiscreteSchedulerOutput(BaseOutput):
     """
     Output class for the scheduler's step function output.
diff --git a/tests/repo_utils/test_check_copies.py b/tests/repo_utils/test_check_copies.py
@@ -0,0 +1,120 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import shutil
+import sys
+import tempfile
+import unittest
+
+import black
+
+
+git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+sys.path.append(os.path.join(git_repo_path, "utils"))
+
+import check_copies  # noqa: E402
+
+
+# This is the reference code that will be used in the tests.
+# If DDPMSchedulerOutput is changed in scheduling_ddpm.py, this code needs to be manually updated.
+REFERENCE_CODE = """    \"""
+    Output class for the scheduler's step function output.
+
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample (x_{0}) based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    \"""
+
+    prev_sample: torch.FloatTensor
+    pred_original_sample: Optional[torch.FloatTensor] = None
+"""
+
+
+class CopyCheckTester(unittest.TestCase):
+    def setUp(self):
+        self.diffusers_dir = tempfile.mkdtemp()
+        os.makedirs(os.path.join(self.diffusers_dir, "schedulers/"))
+        check_copies.DIFFUSERS_PATH = self.diffusers_dir
+        shutil.copy(
+            os.path.join(git_repo_path, "src/diffusers/schedulers/scheduling_ddpm.py"),
+            os.path.join(self.diffusers_dir, "schedulers/scheduling_ddpm.py"),
+        )
+
+    def tearDown(self):
+        check_copies.DIFFUSERS_PATH = "src/diffusers"
+        shutil.rmtree(self.diffusers_dir)
+
+    def check_copy_consistency(self, comment, class_name, class_code, overwrite_result=None):
+        code = comment + f"\nclass {class_name}(nn.Module):\n" + class_code
+        if overwrite_result is not None:
+            expected = comment + f"\nclass {class_name}(nn.Module):\n" + overwrite_result
+        mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119)
+        code = black.format_str(code, mode=mode)
+        fname = os.path.join(self.diffusers_dir, "new_code.py")
+        with open(fname, "w", newline="\n") as f:
+            f.write(code)
+        if overwrite_result is None:
+            self.assertTrue(len(check_copies.is_copy_consistent(fname)) == 0)
+        else:
+            check_copies.is_copy_consistent(f.name, overwrite=True)
+            with open(fname, "r") as f:
+                self.assertTrue(f.read(), expected)
+
+    def test_find_code_in_diffusers(self):
+        code = check_copies.find_code_in_diffusers("schedulers.scheduling_ddpm.DDPMSchedulerOutput")
+        self.assertEqual(code, REFERENCE_CODE)
+
+    def test_is_copy_consistent(self):
+        # Base copy consistency
+        self.check_copy_consistency(
+            "# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput",
+            "DDPMSchedulerOutput",
+            REFERENCE_CODE + "\n",
+        )
+
+        # With no empty line at the end
+        self.check_copy_consistency(
+            "# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput",
+            "DDPMSchedulerOutput",
+            REFERENCE_CODE,
+        )
+
+        # Copy consistency with rename
+        self.check_copy_consistency(
+            "# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->Test",
+            "TestSchedulerOutput",
+            re.sub("DDPM", "Test", REFERENCE_CODE),
+        )
+
+        # Copy consistency with a really long name
+        long_class_name = "TestClassWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReason"
+        self.check_copy_consistency(
+            f"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->{long_class_name}",
+            f"{long_class_name}SchedulerOutput",
+            re.sub("Bert", long_class_name, REFERENCE_CODE),
+        )
+
+        # Copy consistency with overwrite
+        self.check_copy_consistency(
+            "# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->Test",
+            "TestSchedulerOutput",
+            REFERENCE_CODE,
+            overwrite_result=re.sub("DDPM", "Test", REFERENCE_CODE),
+        )
diff --git a/tests/repo_utils/test_check_dummies.py b/tests/repo_utils/test_check_dummies.py
@@ -0,0 +1,124 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+
+git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+sys.path.append(os.path.join(git_repo_path, "utils"))
+
+import check_dummies
+from check_dummies import create_dummy_files, create_dummy_object, find_backend, read_init  # noqa: E402
+
+
+# Align TRANSFORMERS_PATH in check_dummies with the current path
+check_dummies.PATH_TO_DIFFUSERS = os.path.join(git_repo_path, "src", "diffusers")
+
+
+class CheckDummiesTester(unittest.TestCase):
+    def test_find_backend(self):
+        simple_backend = find_backend("    if not is_torch_available():")
+        self.assertEqual(simple_backend, "torch")
+
+        # backend_with_underscore = find_backend("    if not is_tensorflow_text_available():")
+        # self.assertEqual(backend_with_underscore, "tensorflow_text")
+
+        double_backend = find_backend("    if not (is_torch_available() and is_transformers_available()):")
+        self.assertEqual(double_backend, "torch_and_transformers")
+
+        # double_backend_with_underscore = find_backend(
+        #    "    if not (is_sentencepiece_available() and is_tensorflow_text_available()):"
+        # )
+        # self.assertEqual(double_backend_with_underscore, "sentencepiece_and_tensorflow_text")
+
+        triple_backend = find_backend(
+            "    if not (is_torch_available() and is_transformers_available() and is_onnx_available()):"
+        )
+        self.assertEqual(triple_backend, "torch_and_transformers_and_onnx")
+
+    def test_read_init(self):
+        objects = read_init()
+        # We don't assert on the exact list of keys to allow for smooth grow of backend-specific objects
+        self.assertIn("torch", objects)
+        self.assertIn("torch_and_transformers", objects)
+        self.assertIn("flax_and_transformers", objects)
+        self.assertIn("torch_and_transformers_and_onnx", objects)
+
+        # Likewise, we can't assert on the exact content of a key
+        self.assertIn("UNet2DModel", objects["torch"])
+        self.assertIn("FlaxUNet2DConditionModel", objects["flax"])
+        self.assertIn("StableDiffusionPipeline", objects["torch_and_transformers"])
+        self.assertIn("FlaxStableDiffusionPipeline", objects["flax_and_transformers"])
+        self.assertIn("LMSDiscreteScheduler", objects["torch_and_scipy"])
+        self.assertIn("OnnxStableDiffusionPipeline", objects["torch_and_transformers_and_onnx"])
+
+    def test_create_dummy_object(self):
+        dummy_constant = create_dummy_object("CONSTANT", "'torch'")
+        self.assertEqual(dummy_constant, "\nCONSTANT = None\n")
+
+        dummy_function = create_dummy_object("function", "'torch'")
+        self.assertEqual(
+            dummy_function, "\ndef function(*args, **kwargs):\n    requires_backends(function, 'torch')\n"
+        )
+
+        expected_dummy_class = """
+class FakeClass(metaclass=DummyObject):
+    _backends = 'torch'
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, 'torch')
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, 'torch')
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, 'torch')
+"""
+        dummy_class = create_dummy_object("FakeClass", "'torch'")
+        self.assertEqual(dummy_class, expected_dummy_class)
+
+    def test_create_dummy_files(self):
+        expected_dummy_pytorch_file = """# This file is autogenerated by the command `make fix-copies`, do not edit.
+# flake8: noqa
+
+from ..utils import DummyObject, requires_backends
+
+
+CONSTANT = None
+
+
+def function(*args, **kwargs):
+    requires_backends(function, ["torch"])
+
+
+class FakeClass(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+"""
+        dummy_files = create_dummy_files({"torch": ["CONSTANT", "function", "FakeClass"]})
+        self.assertEqual(dummy_files["torch"], expected_dummy_pytorch_file)
diff --git a/utils/check_copies.py b/utils/check_copies.py
diff --git a/utils/check_dummies.py b/utils/check_dummies.py