huggingface · bglick13 · Jul 19, 2022 · Jul 19, 2022 · Jul 26, 2022 · Oct 3, 2022
diff --git a/.gitignore b/.gitignore
@@ -163,4 +163,5 @@ tags
 *.lock
 
 # DS_Store (MacOS)
-.DS_Store
+.DS_Store
+*.png
diff --git a/examples/progressive_distillation/colab.py b/examples/progressive_distillation/colab.py
@@ -0,0 +1,93 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class TrainingConfig:
+    image_size = 128  # the generated image resolution
+    train_batch_size = 16
+    eval_batch_size = 16  # how many images to sample during evaluation
+    num_epochs = 50
+    gradient_accumulation_steps = 1
+    learning_rate = 1e-4
+    lr_warmup_steps = 500
+    save_image_epochs = 10
+    save_model_epochs = 30
+    mixed_precision = "fp16"  # `no` for float32, `fp16` for automatic mixed precision
+    output_dir = "ddpm-butterflies-128"  # the model namy locally and on the HF Hub
+
+    push_to_hub = True  # whether to upload the saved model to the HF Hub
+    hub_private_repo = False
+    overwrite_output_dir = True  # overwrite the old model when re-running the notebook
+    seed = 0
+
+
+config = TrainingConfig()
+
+from datasets import load_dataset
+
+config.dataset_name = "huggan/smithsonian_butterflies_subset"
+dataset = load_dataset(config.dataset_name, split="train")
+from torchvision import transforms
+
+preprocess = transforms.Compose(
+    [
+        transforms.Resize((config.image_size, config.image_size)),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize([0.5], [0.5]),
+    ]
+)
+
+
+def transform(examples):
+    images = [preprocess(image.convert("RGB")) for image in examples["image"]]
+    return {"images": images}
+
+
+dataset.set_transform(transform)
+import torch
+import os
+
+from diffusers import UNet2DModel, DistillationPipeline, DDPMPipeline, DDPMScheduler, DDIMPipeline, DDIMScheduler
+from accelerate import Accelerator
+
+
+teacher = UNet2DModel.from_pretrained("bglick13/ddim-butterflies-128-v-diffusion", subfolder="unet")
+
+# accelerator = Accelerator(
+#     mixed_precision=config.mixed_precision,
+#     gradient_accumulation_steps=config.gradient_accumulation_steps,
+#     log_with="tensorboard",
+#     logging_dir=os.path.join(config.output_dir, "logs"),
+# )
+# teacher = accelerator.prepare(teacher)
+distiller = DistillationPipeline()
+n_teacher_trainsteps = 1000
+new_teacher, distilled_ema, distill_accelrator = distiller(
+    teacher,
+    n_teacher_trainsteps,
+    dataset,
+    epochs=100,
+    batch_size=32,
+    mixed_precision="fp16",
+    sample_every=1,
+    gamma=0.0,
+    lr=1e-4,
+)
+new_scheduler = DDIMScheduler(
+    num_train_timesteps=500, beta_schedule="squaredcos_cap_v2", variance_type="v_diffusion", prediction_type="v"
+)
+pipeline = DDIMPipeline(
+    unet=distill_accelrator.unwrap_model(distilled_ema.averaged_model),
+    scheduler=new_scheduler,
+)
+
+# run pipeline in inference (sample random noise and denoise)
+images = pipeline(batch_size=4, output_type="numpy", generator=torch.manual_seed(0)).images
+
+# denormalize the images and save to tensorboard
+images_processed = (images * 255).round().astype("uint8")
+from PIL import Image
+
+img = Image.fromarray(images_processed[0])
+img.save("denoised.png")
diff --git a/examples/progressive_distillation/image_diffusion.ipynb b/examples/progressive_distillation/image_diffusion.ipynb
@@ -0,0 +1,273 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)\n",
+      "WARNING:torch.distributed.elastic.multiprocessing.redirects:NOTE: Redirects are currently not supported in Windows or MacOs.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from PIL import Image\n",
+    "from diffusers import AutoencoderKL, UNet2DModel, DDIMPipeline, DDIMScheduler, DDPMPipeline, DDPMScheduler, DistillationPipeline\n",
+    "from diffusers.optimization import get_scheduler\n",
+    "from diffusers.training_utils import EMAModel\n",
+    "import math\n",
+    "import requests\n",
+    "from torchvision.transforms import (\n",
+    "    CenterCrop,\n",
+    "    Compose,\n",
+    "    InterpolationMode,\n",
+    "    Normalize,\n",
+    "    RandomHorizontalFlip,\n",
+    "    Resize,\n",
+    "    ToTensor,\n",
+    "    ToPILImage\n",
+    ")\n",
+    "from torch.utils.data import Dataset\n",
+    "from accelerate import Accelerator\n",
+    "import utils\n",
+    "from tqdm import tqdm\n",
+    "import torch.nn.functional as F\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<torch._C.Generator at 0x7f9a051d2010>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.manual_seed(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_config = utils.DiffusionTrainingArgs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load an image of my dog for this example\n",
+    "\n",
+    "image_url = \"https://i.imgur.com/IJcs4Aa.jpeg\"\n",
+    "image = Image.open(requests.get(image_url, stream=True).raw)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the transforms to apply to the image for training\n",
+    "augmentations = utils.get_train_transforms(training_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SingleImageDataset(Dataset):\n",
+    "    def __init__(self, image, batch_size):\n",
+    "        self.image = image\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.batch_size\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        return self.image\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_image = augmentations(image.convert(\"RGB\"))\n",
+    "train_dataset = SingleImageDataset(train_image, training_config.batch_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2b23b591496741a299b75e4e9448b29a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/455M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1df9166b338f49adbaac183384972ea0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "teacher = UNet2DModel.from_pretrained(\"bglick13/minnie-diffusion\")\n",
+    "distiller = DistillationPipeline()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "N = 1000\n",
+    "generator = torch.manual_seed(0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "teacher = UNet2DModel.from_pretrained(\"bglick13/minnie-diffusion\")\n",
+    "N = 1000\n",
+    "distilled_images = []\n",
+    "for distill_step in range(2):\n",
+    "    print(f\"Distill step {distill_step} from {N} -> {N // 2}\")\n",
+    "    teacher, distilled_ema, distill_accelrator = distiller(teacher, N, train_dataset, epochs=300, batch_size=training_config.batch_size)\n",
+    "    N = N // 2\n",
+    "    new_scheduler = DDPMScheduler(num_train_timesteps=N, beta_schedule=\"squaredcos_cap_v2\")\n",
+    "    pipeline = DDPMPipeline(\n",
+    "        unet=distill_accelrator.unwrap_model(distilled_ema.averaged_model if training_config.use_ema else teacher),\n",
+    "        scheduler=new_scheduler,\n",
+    "    )\n",
+    "\n",
+    "    # run pipeline in inference (sample random noise and denoise)\n",
+    "    images = pipeline(generator=generator, batch_size=training_config.batch_size, output_type=\"numpy\").images\n",
+    "\n",
+    "    # denormalize the images and save to tensorboard\n",
+    "    images_processed = (images * 255).round().astype(\"uint8\")\n",
+    "    distilled_images.append(images_processed[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display train image for reference\n",
+    "train_image_display = train_image * 0.5 + 0.5\n",
+    "train_image_display = ToPILImage()(train_image_display)\n",
+    "display(train_image_display)\n",
+    "\n",
+    "for i, image in enumerate(distilled_images):\n",
+    "    print(f\"Distilled image {i}\")\n",
+    "    display(Image.fromarray(image))\n",
+    "    Image.fromarray(image).save(f\"distilled_{i}.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(Image.fromarray(images_processed[0]))\n",
+    "display(Image.fromarray(images_processed[1]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.6 ('diffusers')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "77f6871a522595648ebba7232d315a2f946cc4cd5f56470cb61e517ec9b94e2e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}