pythoninoffice · SimolZimol · Jan 5, 2023 · Jan 5, 2023 · Nov 8, 2023 · Nov 8, 2023
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # amd_webui
 ## System Requirements
 - AMD GPU with at least 8GB VRAM
-- One of: Python 3.7, 3.8, 3.9, or 3.10 | Download from https://www.python.org/
+- One of: Python 3.8, 3.9, 3.10 or 3.11 | Download from https://www.python.org/
 - Git | Download from https://git-scm.com/downloads
 
 ## How To Install

diff --git a/amd_webui.py b/amd_webui.py
@@ -2,6 +2,7 @@
 from diffusers import OnnxStableDiffusionPipeline, OnnxStableDiffusionImg2ImgPipeline
 from huggingface_hub import _login
 from huggingface_hub.hf_api import HfApi, HfFolder
+from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
 import subprocess
 import sys
 import pathlib
@@ -10,6 +11,11 @@
 import random
 import datetime
 from PIL import Image
+import onnxruntime
+import pprint
+pprint.pprint(onnxruntime.get_available_providers())
+
+
 #from modules import txt2img
 
 
@@ -30,9 +36,10 @@
 #scale = 7.5
 #pipe = None
 ##need to set up UI for downloading weights
+lms = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", steps_offset=1)
 
 
-def txt2img(prompt, negative_prompt, steps, height, width, scale, denoise_strength=0, seed=None, scheduler=None, num_image=None):
+def txt2img(prompt, negative_prompt, steps, height, width, scale, denoise_strength=0, seed=None, scheduler=lms, num_image=None):
     try:
         seed = int(seed)
         if seed < 0:
@@ -42,8 +49,6 @@ def txt2img(prompt, negative_prompt, steps, height, width, scale, denoise_streng
 
     generator = np.random.RandomState(seed)
 
-    #generator = torch.Generator(device='cpu')
-    #generator = generator.manual_seed(seed)
     image = pipe(prompt,
                 negative_prompt = negative_prompt,
                 num_inference_steps=steps,
@@ -105,7 +110,7 @@ def huggingface_login(token):
 
 def pip_install(lib):
     subprocess.run(f'echo Installing {lib}...', shell=True)
-    if 'ort_nightly_directml' in lib:
+    if 'onnxruntime-directml' in lib:
         subprocess.run(f'echo 1', shell=True)
         subprocess.run(f'echo "{python}" -m pip install {lib}', shell=True)
         subprocess.run(f'"{python}" -m pip install {lib} --force-reinstall', shell=True)
@@ -124,7 +129,9 @@ def is_installed(lib):
 
 def download_sd_model(model_path):
     pip_install('onnx')
-    from src.diffusers.scripts import convert_stable_diffusion_checkpoint_to_onnx
+    print('abc')
+    from conv import convert_models
+    print('ttt')
     onnx_opset = 14
     onnx_fp16 = False
     try:
@@ -135,7 +142,7 @@ def download_sd_model(model_path):
     if not onnx_dir.exists():
         onnx_dir.mkdir(parents=True, exist_ok=True)
         print(model_name)
-    convert_stable_diffusion_checkpoint_to_onnx.convert_models(model_path, str(onnx_model_dir), onnx_opset, onnx_fp16)
+    convert_models(model_path, str(onnx_model_dir), onnx_opset, onnx_fp16)
     pip_uninstall('onnx')
 
 
@@ -156,8 +163,9 @@ def load_onnx_model(model):
 ##    subprocess.run('echo installing onnx nightly built', shell=True)
     global pipe
     pipe = OnnxStableDiffusionPipeline.from_pretrained(str(onnx_dir/model),
-                                                       safety_checker = None,
-                                                       provider="DmlExecutionProvider")
+                                                        safety_checker = None,
+                                                       provider="DmlExecutionProvider",
+                                                       )
 
     return 'model ready'
 

diff --git a/conv.py b/conv.py
@@ -0,0 +1,265 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+import onnx
+import torch
+from packaging import version
+from torch.onnx import export
+
+from diffusers import OnnxRuntimeModel, OnnxStableDiffusionPipeline, StableDiffusionPipeline
+
+
+is_torch_less_than_1_11 = version.parse(version.parse(torch.__version__).base_version) < version.parse("1.11")
+
+
+def onnx_export(
+    model,
+    model_args: tuple,
+    output_path: Path,
+    ordered_input_names,
+    output_names,
+    dynamic_axes,
+    opset,
+    use_external_data_format=False,
+):
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # PyTorch deprecated the `enable_onnx_checker` and `use_external_data_format` arguments in v1.11,
+    # so we check the torch version for backwards compatibility
+    if is_torch_less_than_1_11:
+        export(
+            model,
+            model_args,
+            f=output_path.as_posix(),
+            input_names=ordered_input_names,
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+            do_constant_folding=True,
+            use_external_data_format=use_external_data_format,
+            enable_onnx_checker=True,
+            opset_version=opset,
+        )
+    else:
+        export(
+            model,
+            model_args,
+            f=output_path.as_posix(),
+            input_names=ordered_input_names,
+            output_names=output_names,
+            dynamic_axes=dynamic_axes,
+            do_constant_folding=True,
+            opset_version=opset,
+        )
+
+
+@torch.no_grad()
+def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = False):
+    dtype = torch.float16 if fp16 else torch.float32
+    if fp16 and torch.cuda.is_available():
+        device = "cuda"
+    elif fp16 and not torch.cuda.is_available():
+        raise ValueError("`float16` model export is only supported on GPUs with CUDA")
+    else:
+        device = "cpu"
+    pipeline = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=dtype).to(device)
+    output_path = Path(output_path)
+
+    # TEXT ENCODER
+    num_tokens = pipeline.text_encoder.config.max_position_embeddings
+    text_hidden_size = pipeline.text_encoder.config.hidden_size
+    text_input = pipeline.tokenizer(
+        "A sample prompt",
+        padding="max_length",
+        max_length=pipeline.tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    )
+    onnx_export(
+        pipeline.text_encoder,
+        # casting to torch.int32 until the CLIP fix is released: https://github.com/huggingface/transformers/pull/18515/files
+        model_args=(text_input.input_ids.to(device=device, dtype=torch.int32)),
+        output_path=output_path / "text_encoder" / "model.onnx",
+        ordered_input_names=["input_ids"],
+        output_names=["last_hidden_state", "pooler_output"],
+        dynamic_axes={
+            "input_ids": {0: "batch", 1: "sequence"},
+        },
+        opset=opset,
+    )
+    del pipeline.text_encoder
+
+    # UNET
+    unet_in_channels = pipeline.unet.config.in_channels
+    unet_sample_size = pipeline.unet.config.sample_size
+    unet_path = output_path / "unet" / "model.onnx"
+    onnx_export(
+        pipeline.unet,
+        model_args=(
+            torch.randn(2, unet_in_channels, unet_sample_size, unet_sample_size).to(device=device, dtype=dtype),
+            torch.randn(2).to(device=device, dtype=dtype),
+            torch.randn(2, num_tokens, text_hidden_size).to(device=device, dtype=dtype),
+            False,
+        ),
+        output_path=unet_path,
+        ordered_input_names=["sample", "timestep", "encoder_hidden_states", "return_dict"],
+        output_names=["out_sample"],  # has to be different from "sample" for correct tracing
+        dynamic_axes={
+            "sample": {0: "batch", 1: "channels", 2: "height", 3: "width"},
+            "timestep": {0: "batch"},
+            "encoder_hidden_states": {0: "batch", 1: "sequence"},
+        },
+        opset=opset,
+        use_external_data_format=True,  # UNet is > 2GB, so the weights need to be split
+    )
+    unet_model_path = str(unet_path.absolute().as_posix())
+    unet_dir = os.path.dirname(unet_model_path)
+    unet = onnx.load(unet_model_path)
+    # clean up existing tensor files
+    shutil.rmtree(unet_dir)
+    os.mkdir(unet_dir)
+    # collate external tensor files into one
+    onnx.save_model(
+        unet,
+        unet_model_path,
+        save_as_external_data=True,
+        all_tensors_to_one_file=True,
+        location="weights.pb",
+        convert_attribute=False,
+    )
+    del pipeline.unet
+
+    # VAE ENCODER
+    vae_encoder = pipeline.vae
+    vae_in_channels = vae_encoder.config.in_channels
+    vae_sample_size = vae_encoder.config.sample_size
+    # need to get the raw tensor output (sample) from the encoder
+    vae_encoder.forward = lambda sample, return_dict: vae_encoder.encode(sample, return_dict)[0].sample()
+    onnx_export(
+        vae_encoder,
+        model_args=(
+            torch.randn(1, vae_in_channels, vae_sample_size, vae_sample_size).to(device=device, dtype=dtype),
+            False,
+        ),
+        output_path=output_path / "vae_encoder" / "model.onnx",
+        ordered_input_names=["sample", "return_dict"],
+        output_names=["latent_sample"],
+        dynamic_axes={
+            "sample": {0: "batch", 1: "channels", 2: "height", 3: "width"},
+        },
+        opset=opset,
+    )
+
+    # VAE DECODER
+    vae_decoder = pipeline.vae
+    vae_latent_channels = vae_decoder.config.latent_channels
+    vae_out_channels = vae_decoder.config.out_channels
+    # forward only through the decoder part
+    vae_decoder.forward = vae_encoder.decode
+    onnx_export(
+        vae_decoder,
+        model_args=(
+            torch.randn(1, vae_latent_channels, unet_sample_size, unet_sample_size).to(device=device, dtype=dtype),
+            False,
+        ),
+        output_path=output_path / "vae_decoder" / "model.onnx",
+        ordered_input_names=["latent_sample", "return_dict"],
+        output_names=["sample"],
+        dynamic_axes={
+            "latent_sample": {0: "batch", 1: "channels", 2: "height", 3: "width"},
+        },
+        opset=opset,
+    )
+    del pipeline.vae
+
+    # SAFETY CHECKER
+    if pipeline.safety_checker is not None:
+        safety_checker = pipeline.safety_checker
+        clip_num_channels = safety_checker.config.vision_config.num_channels
+        clip_image_size = safety_checker.config.vision_config.image_size
+        safety_checker.forward = safety_checker.forward_onnx
+        onnx_export(
+            pipeline.safety_checker,
+            model_args=(
+                torch.randn(
+                    1,
+                    clip_num_channels,
+                    clip_image_size,
+                    clip_image_size,
+                ).to(device=device, dtype=dtype),
+                torch.randn(1, vae_sample_size, vae_sample_size, vae_out_channels).to(device=device, dtype=dtype),
+            ),
+            output_path=output_path / "safety_checker" / "model.onnx",
+            ordered_input_names=["clip_input", "images"],
+            output_names=["out_images", "has_nsfw_concepts"],
+            dynamic_axes={
+                "clip_input": {0: "batch", 1: "channels", 2: "height", 3: "width"},
+                "images": {0: "batch", 1: "height", 2: "width", 3: "channels"},
+            },
+            opset=opset,
+        )
+        del pipeline.safety_checker
+        safety_checker = OnnxRuntimeModel.from_pretrained(output_path / "safety_checker")
+        feature_extractor = pipeline.feature_extractor
+    else:
+        safety_checker = None
+        feature_extractor = None
+
+    onnx_pipeline = OnnxStableDiffusionPipeline(
+        vae_encoder=OnnxRuntimeModel.from_pretrained(output_path / "vae_encoder"),
+        vae_decoder=OnnxRuntimeModel.from_pretrained(output_path / "vae_decoder"),
+        text_encoder=OnnxRuntimeModel.from_pretrained(output_path / "text_encoder"),
+        tokenizer=pipeline.tokenizer,
+        unet=OnnxRuntimeModel.from_pretrained(output_path / "unet"),
+        scheduler=pipeline.scheduler,
+        safety_checker=safety_checker,
+        feature_extractor=feature_extractor,
+        requires_safety_checker=safety_checker is not None,
+    )
+
+    onnx_pipeline.save_pretrained(output_path)
+    print("ONNX pipeline saved to", output_path)
+
+    del pipeline
+    del onnx_pipeline
+    _ = OnnxStableDiffusionPipeline.from_pretrained(output_path, provider="CPUExecutionProvider")
+    print("ONNX pipeline is loadable")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        required=True,
+        help="Path to the `diffusers` checkpoint to convert (either a local directory or on the Hub).",
+    )
+
+    parser.add_argument("--output_path", type=str, required=True, help="Path to the output model.")
+
+    parser.add_argument(
+        "--opset",
+        default=14,
+        type=int,
+        help="The version of the ONNX operator set to use.",
+    )
+    parser.add_argument("--fp16", action="store_true", default=False, help="Export the models in `float16` mode")
+
+    args = parser.parse_args()
+
+    convert_models(args.model_path, args.output_path, args.opset, args.fp16)
diff --git a/....dev20220908001-cp310-cp310-win_amd64.whl → ....dev20230915012-cp310-cp310-win_amd64.whl b/....dev20220908001-cp310-cp310-win_amd64.whl → ....dev20230915012-cp310-cp310-win_amd64.whl
diff --git a/....0.dev20220908001-cp38-cp38-win_amd64.whl → ....dev20230919002-cp311-cp311-win_amd64.whl b/....0.dev20220908001-cp38-cp38-win_amd64.whl → ....dev20230919002-cp311-cp311-win_amd64.whl
diff --git a/...0.dev20220908001-cp37-cp37m-win_amd64.whl → ....0.dev20230919002-cp38-cp38-win_amd64.whl b/...0.dev20220908001-cp37-cp37m-win_amd64.whl → ....0.dev20230919002-cp38-cp38-win_amd64.whl
diff --git a/....0.dev20220908001-cp39-cp39-win_amd64.whl → ....0.dev20230919002-cp39-cp39-win_amd64.whl b/....0.dev20220908001-cp39-cp39-win_amd64.whl → ....0.dev20230919002-cp39-cp39-win_amd64.whl
diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,17 @@
+gradio
 ftfy==6.1.1
-gradio==3.12.0
--e git+https://github.com/huggingface/diffusers@daebee0963d2b39fb3fa9532ab271a91674c4070#egg=diffusers
-huggingface-hub==0.11.1
-numpy==1.23.5
-onnxruntime==1.13.1
+diffusers==0.21.4
+numpy
+onnxruntime==1.15.1
+huggingface-hub==0.16.4
 scipy==1.9.3
 torch==1.13.0
 transformers==4.25.1
-
-
+Accelerate==0.15.0
+nest-asyncio==1.5.6 
+discord.py
+argparse
+pytest-shutil
+pathlib
+onnx
+packaging