From 6384f23dbbc9d690cb3fae2f461487fc2ab47479 Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Wed, 13 Dec 2023 11:40:18 +0100 Subject: [PATCH 1/8] support for marigold --- pix2pix/models/pix2pix4depth_model.py | 4 ++-- src/depthmap_generation.py | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pix2pix/models/pix2pix4depth_model.py b/pix2pix/models/pix2pix4depth_model.py index 89e8965..8fff0fa 100644 --- a/pix2pix/models/pix2pix4depth_model.py +++ b/pix2pix/models/pix2pix4depth_model.py @@ -94,8 +94,8 @@ def set_input_train(self, input): self.real_A = torch.cat((self.outer, self.inner), 1) def set_input(self, outer, inner): - inner = torch.from_numpy(inner).unsqueeze(0).unsqueeze(0) - outer = torch.from_numpy(outer).unsqueeze(0).unsqueeze(0) + inner = torch.from_numpy(inner).unsqueeze(0).unsqueeze(0).float() + outer = torch.from_numpy(outer).unsqueeze(0).unsqueeze(0).float() inner = (inner - torch.min(inner))/(torch.max(inner)-torch.min(inner)) outer = (outer - torch.min(outer))/(torch.max(outer)-torch.min(outer)) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index 93dd03c..8ce5152 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -197,8 +197,8 @@ def load_models(self, model_type, device: torch.device, boost: bool): model = build_model(conf) elif model_type == 10: # Marigold v1 - # TODO: pass more parameters - model_path = f"{model_dir}/marigold_v1/" + model_path = "Bingxin/Marigold" + print(model_path) from repositories.Marigold.src.model.marigold_pipeline import MarigoldPipeline model = MarigoldPipeline.from_pretrained(model_path) @@ -301,7 +301,7 @@ def get_raw_prediction(self, input, net_width, net_height): self.resize_mode, self.normalization, self.no_half, self.precision == "autocast") elif self.depth_model_type == 10: - raw_prediction = estimatemarigold(img, self.depth_model, net_width, net_height, self.device) + raw_prediction = estimatemarigold(img, self.depth_model, net_width, net_height) else: raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model, self.boost_whole_size_threshold) @@ -405,7 +405,7 @@ def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precisi return prediction -def estimatemarigold(image, model, w, h, device): +def estimatemarigold(image, model, w, h): from repositories.Marigold.src.model.marigold_pipeline import MarigoldPipeline from repositories.Marigold.src.util.ensemble import ensemble_depths from repositories.Marigold.src.util.image_util import chw2hwc, colorize_depth_maps, resize_max_res @@ -418,13 +418,19 @@ def estimatemarigold(image, model, w, h, device): tol = 1e-3 reduction_method = "median" merging_max_res = None + resize_to_max_res = None # From Marigold repository run.py with torch.no_grad(): + image = (image * 255).astype(np.uint8) + if resize_to_max_res is not None: + image = np.asarray(resize_max_res( + Image.fromarray(image), max_edge_resolution=resize_to_max_res + )) rgb = np.transpose(image, (2, 0, 1)) # [H, W, rgb] -> [rgb, H, W] rgb_norm = rgb / 255.0 rgb_norm = torch.from_numpy(rgb_norm).unsqueeze(0).float() - rgb_norm = rgb_norm.to(device) + rgb_norm = rgb_norm.to(depthmap_device) model.unet.eval() depth_pred_ls = [] @@ -445,7 +451,7 @@ def estimatemarigold(image, model, w, h, device): tol=tol, reduction=reduction_method, max_res=merging_max_res, - device=device, + device=depthmap_device, ) else: depth_pred = depth_preds @@ -942,6 +948,8 @@ def doubleestimate(img, size1, size2, pix2pixsize, model, net_type, pix2pixmodel def singleestimate(img, msize, model, net_type): if net_type == 0: return estimateleres(img, model, msize, msize) + elif net_type == 10: + return estimatemarigold(img, model, msize, msize) elif net_type >= 7: # np to PIL return estimatezoedepth(Image.fromarray(np.uint8(img * 255)).convert('RGB'), model, msize, msize) From dd2d907a6a1d8896da78e992c57fffd68a8ab5fe Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:25:27 +0100 Subject: [PATCH 2/8] Invert depth map --- src/depthmap_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index 8ce5152..e09b134 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -305,7 +305,7 @@ def get_raw_prediction(self, input, net_width, net_height): else: raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model, self.boost_whole_size_threshold) - raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9] + raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9, 10] return raw_prediction, raw_prediction_invert From b7ee6fd8402dee2906a2b5a49f3dbad5cbc22630 Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:26:48 +0100 Subject: [PATCH 3/8] move mult/div to resize_to_max_res to leverage resize_max_res fn --- src/depthmap_generation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index e09b134..d08091f 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -422,13 +422,12 @@ def estimatemarigold(image, model, w, h): # From Marigold repository run.py with torch.no_grad(): - image = (image * 255).astype(np.uint8) if resize_to_max_res is not None: + image = (image * 255).astype(np.uint8) image = np.asarray(resize_max_res( Image.fromarray(image), max_edge_resolution=resize_to_max_res - )) - rgb = np.transpose(image, (2, 0, 1)) # [H, W, rgb] -> [rgb, H, W] - rgb_norm = rgb / 255.0 + )) / 255.0 + rgb_norm = np.transpose(image, (2, 0, 1)) # [H, W, rgb] -> [rgb, H, W] rgb_norm = torch.from_numpy(rgb_norm).unsqueeze(0).float() rgb_norm = rgb_norm.to(depthmap_device) From 889ed866973a29f2bda329035dea984e49549666 Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:28:06 +0100 Subject: [PATCH 4/8] repositories.Marigold -> Marigold --- install.py | 2 +- src/depthmap_generation.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/install.py b/install.py index 9d74403..33dbeeb 100644 --- a/install.py +++ b/install.py @@ -53,4 +53,4 @@ def ensure(module_name, min_version=None): if platform.system() == 'Darwin': ensure('pyqt6') -launch.git_clone("https://github.com/prs-eth/Marigold", "repositories/Marigold", "Marigold", "cc78ff3") +launch.git_clone("https://github.com/prs-eth/Marigold", "Marigold", "Marigold", "cc78ff3") diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index d08091f..a4eec44 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -74,8 +74,6 @@ def load_models(self, model_type, device: torch.device, boost: bool): model_dir = "./models/midas" if model_type == 0: model_dir = "./models/leres" - if model_type == 10: - "./models/marigold" # create paths to model if not present os.makedirs(model_dir, exist_ok=True) os.makedirs('./models/pix2pix', exist_ok=True) @@ -199,7 +197,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): elif model_type == 10: # Marigold v1 model_path = "Bingxin/Marigold" print(model_path) - from repositories.Marigold.src.model.marigold_pipeline import MarigoldPipeline + from Marigold.src.model.marigold_pipeline import MarigoldPipeline model = MarigoldPipeline.from_pretrained(model_path) model.eval() # prepare for evaluation @@ -406,10 +404,10 @@ def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precisi def estimatemarigold(image, model, w, h): - from repositories.Marigold.src.model.marigold_pipeline import MarigoldPipeline - from repositories.Marigold.src.util.ensemble import ensemble_depths - from repositories.Marigold.src.util.image_util import chw2hwc, colorize_depth_maps, resize_max_res - from repositories.Marigold.src.util.seed_all import seed_all + from Marigold.src.model.marigold_pipeline import MarigoldPipeline + from Marigold.src.util.ensemble import ensemble_depths + from Marigold.src.util.image_util import chw2hwc, colorize_depth_maps, resize_max_res + from Marigold.src.util.seed_all import seed_all n_repeat = 10 denoise_steps = 10 From 9d11eed2c574ecebac5d9e7f821c088f9f5d9838 Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:28:29 +0100 Subject: [PATCH 5/8] bump version --- src/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc.py b/src/misc.py index 41d1d15..2c024a6 100644 --- a/src/misc.py +++ b/src/misc.py @@ -15,7 +15,7 @@ def get_commit_hash(): REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script" SCRIPT_NAME = "DepthMap" -SCRIPT_VERSION = "v0.4.4" +SCRIPT_VERSION = "v0.4.5" SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" From 1585a508b1b2f16270e0cc3459ce5c001ddd05cb Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:29:02 +0100 Subject: [PATCH 6/8] Marigold ref to readme --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index ad31138..fbbc8ec 100644 --- a/README.md +++ b/README.md @@ -198,3 +198,16 @@ ZoeDepth : copyright = {arXiv.org perpetual, non-exclusive license} } ``` + +Marigold - Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation: + +``` +@misc{ke2023repurposing, + title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, + author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler}, + year={2023}, + eprint={2312.02145}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` From 4e8a81a725096210633f1151f254ebd12379dc8d Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:29:46 +0100 Subject: [PATCH 7/8] Changelog marigold --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 955e751..42778ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ ## Changelog +### 0.4.5 + * Support for [Marigold](https://marigoldmonodepth.github.io). [PR #385](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/385). ### 0.4.4 * Compatibility with stable-diffusion-webui 1.6.0 ### 0.4.3 video processing tab From 548cd73397dc88a9045821c316eedd88bf5e4719 Mon Sep 17 00:00:00 2001 From: Andres Romero Date: Thu, 14 Dec 2023 17:30:17 +0100 Subject: [PATCH 8/8] Marigold requirements --- install.py | 5 +++++ requirements.txt | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/install.py b/install.py index 33dbeeb..0b54f5e 100644 --- a/install.py +++ b/install.py @@ -38,6 +38,11 @@ def ensure(module_name, min_version=None): launch.run_pip('install "moviepy==1.0.2"', "moviepy requirement for depthmap script") ensure('transforms3d', '0.4.1') +ensure('transformers', '4.32.1') +ensure('xformers', '0.0.21') +ensure('accelerate', '0.22.0') +ensure('diffusers', '0.20.1') + ensure('imageio') # 2.4.1 try: # Dirty hack to not reinstall every time importlib_metadata.version('imageio-ffmpeg') diff --git a/requirements.txt b/requirements.txt index cb063bf..b222d85 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,5 +16,9 @@ transforms3d>=0.4.1 imageio>=2.4.1,<3.0 imageio-ffmpeg networkx>=2.5 +transformers>=4.32.1 # For Marigold +xformers==0.0.21 # For Marigold +accelerate>=0.22.0 # For Marigold +diffusers>=0.20.1 # For Marigold pyqt5; sys_platform == 'windows' pyqt6; sys_platform != 'windows'