Skip to content

Commit 1065ec1

Browse files
committed
Introduce separation parameter for stereo image generation
1 parent 90fcc37 commit 1065ec1

File tree

2 files changed

+35
-21
lines changed

2 files changed

+35
-21
lines changed

scripts/depthmap.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ def main_ui_panel(is_depth_tab):
147147
with gr.Row(visible=False) as stereo_options_row_1:
148148
stereo_divergence = gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)',
149149
value=2.5)
150+
stereo_separation = gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, label='Separation (moves images apart)',
151+
value=0.0)
150152
with gr.Row(visible=False) as stereo_options_row_2:
151153
stereo_fill = gr.Dropdown(label="Gap fill technique",
152154
choices=['none', 'naive', 'naive_interpolating', 'polylines_soft',
@@ -266,7 +268,7 @@ def background_removal_options_visibility(v):
266268
outputs=[bgrem_options_row_1, bgrem_options_row_2]
267269
)
268270

269-
return [compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical]
271+
return [compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical]
270272

271273

272274
class Script(scripts.Script):
@@ -283,7 +285,7 @@ def ui(self, is_img2img):
283285

284286
# run from script in txt2img or img2img
285287
def run(self, p,
286-
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical
288+
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical
287289
):
288290

289291
# sd process
@@ -309,7 +311,7 @@ def run(self, p,
309311

310312
newmaps, mesh_fi, meshsimple_fi = run_depthmap(processed, p.outpath_samples, inputimages, None,
311313
compute_device, model_type,
312-
net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
314+
net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
313315
background_removed_images, "mp4", 0, False, None, False, gen_mesh, mesh_occlude, mesh_spherical )
314316

315317
for img in newmaps:
@@ -328,7 +330,7 @@ def reload_sd_model():
328330
shared.sd_model.first_stage_model.to(devices.device)
329331

330332
def run_depthmap(processed, outpath, inputimages, inputnames,
331-
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
333+
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
332334
background_removed_images, fnExt, vid_ssaa, custom_depthmap, custom_depthmap_img, depthmap_batch_reuse, gen_mesh, mesh_occlude, mesh_spherical):
333335

334336
if len(inputimages) == 0 or inputimages[0] == None:
@@ -687,7 +689,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames,
687689
try:
688690
images.save_image(Image.fromarray(img_output), outpath, "", processed.all_seeds[count], processed.all_prompts[count], opts.samples_format, info=info, p=processed, suffix="_depth")
689691
except ValueError as ve:
690-
if not 'image has wrong mode' in str(ve): raise ve
692+
if not ('image has wrong mode' in str(ve) or 'cannot write mode I;16 as JPEG' in str(ve)): raise ve
691693
else:
692694
images.save_image(Image.fromarray(img_output2), outpath, "", processed.all_seeds[count], processed.all_prompts[count], opts.samples_format, info=info, p=processed, suffix="_depth")
693695
elif save_depth:
@@ -697,7 +699,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames,
697699
try:
698700
images.save_image(Image.fromarray(img_output), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None)
699701
except ValueError as ve:
700-
if not 'image has wrong mode' in str(ve): raise ve
702+
if not ('image has wrong mode' in str(ve) or 'cannot write mode I;16 as JPEG' in str(ve)): raise ve
701703
else:
702704
images.save_image(Image.fromarray(img_output2), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None)
703705
else:
@@ -714,7 +716,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames,
714716
print("Generating stereoscopic images..")
715717

716718
stereomodes = stereo_modes
717-
stereoimages = create_stereoimages(inputimages[count], img_output, stereo_divergence, stereomodes, stereo_balance, stereo_fill)
719+
stereoimages = create_stereoimages(inputimages[count], img_output, stereo_divergence, stereo_separation, stereomodes, stereo_balance, stereo_fill)
718720

719721
for c in range(0, len(stereoimages)):
720722
outimages.append(stereoimages[c])
@@ -1138,6 +1140,7 @@ def run_generate(depthmap_mode,
11381140
gen_stereo,
11391141
stereo_modes,
11401142
stereo_divergence,
1143+
stereo_separation,
11411144
stereo_fill,
11421145
stereo_balance,
11431146
inpaint,
@@ -1205,7 +1208,7 @@ def run_generate(depthmap_mode,
12051208

12061209
outputs, mesh_fi, meshsimple_fi = run_depthmap(
12071210
None, outpath, imageArr, imageNameArr,
1208-
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
1211+
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal,
12091212
background_removed_images, fnExt, vid_ssaa, custom_depthmap, custom_depthmap_img, depthmap_batch_reuse, gen_mesh, mesh_occlude, mesh_spherical)
12101213

12111214
# use inpainted 3d mesh to show in 3d model output when enabled in settings
@@ -1264,7 +1267,7 @@ def on_ui_tabs():
12641267
submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
12651268

12661269
# insert main panel
1267-
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical = main_ui_panel(True)
1270+
compute_device, model_type, net_width, net_height, match_size, boost, invert_depth, clipdepth, clipthreshold_far, clipthreshold_near, combine_output, combine_output_axis, save_depth, show_depth, show_heat, gen_stereo, stereo_modes, stereo_divergence, stereo_separation, stereo_fill, stereo_balance, inpaint, inpaint_vids, background_removal, save_background_removal_masks, gen_normal, pre_depth_background_removal, background_removal_model, gen_mesh, mesh_occlude, mesh_spherical = main_ui_panel(True)
12681271

12691272
unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
12701273

@@ -1357,6 +1360,7 @@ def custom_depthmap_visibility(v):
13571360
gen_stereo,
13581361
stereo_modes,
13591362
stereo_divergence,
1363+
stereo_separation,
13601364
stereo_fill,
13611365
stereo_balance,
13621366
inpaint,

scripts/stereoimage_generation.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33
from PIL import Image
44

5-
def create_stereoimages(original_image, depthmap, divergence, modes=None, stereo_balance=0.0,
5+
def create_stereoimages(original_image, depthmap, divergence, separation=0.0, modes=None, stereo_balance=0.0,
66
fill_technique='polylines_sharp'):
77
"""Creates stereoscopic images.
88
An effort is made to make them look nice, but beware that the resulting image will have some distortion .
@@ -11,6 +11,9 @@ def create_stereoimages(original_image, depthmap, divergence, modes=None, stereo
1111
:param depthmap: depthmap corresponding to the original image. White = near, black = far.
1212
:param float divergence: the measure of 3D effect, in percentages.
1313
A good value will likely be somewhere in the [0.05; 10.0) interval.
14+
:param float separation: measure by how much to move two halfs of the spereoimage apart from eachother.
15+
Measured in percentages. Negative values move two parts closer togethert.
16+
Affects which parts of the image will be visible in left and/or right half.
1417
:param list modes: how the result will look like. By default only 'left-right' is generated
1518
- a picture for the left eye will be on the left and the picture from the right eye - on the rigth.
1619
The supported modes are: 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'red-cyan-anaglyph'.
@@ -28,9 +31,9 @@ def create_stereoimages(original_image, depthmap, divergence, modes=None, stereo
2831
original_image = np.asarray(original_image)
2932
balance = (stereo_balance + 1) / 2
3033
left_eye = original_image if balance < 0.001 else \
31-
apply_stereo_divergence(original_image, depthmap, +1 * divergence * balance, fill_technique)
34+
apply_stereo_divergence(original_image, depthmap, +1 * divergence * balance, -1 * separation, fill_technique)
3235
right_eye = original_image if balance > 0.999 else \
33-
apply_stereo_divergence(original_image, depthmap, -1 * divergence * (1 - balance), fill_technique)
36+
apply_stereo_divergence(original_image, depthmap, -1 * divergence * (1 - balance), separation, fill_technique)
3437

3538
results = []
3639
for mode in modes:
@@ -49,20 +52,26 @@ def create_stereoimages(original_image, depthmap, divergence, modes=None, stereo
4952
return [Image.fromarray(r) for r in results]
5053

5154

52-
def apply_stereo_divergence(original_image, depth, divergence, fill_technique):
55+
def apply_stereo_divergence(original_image, depth, divergence, separation, fill_technique):
5356
depth_min = depth.min()
5457
depth_max = depth.max()
5558
normalized_depth = (depth - depth_min) / (depth_max - depth_min)
5659
divergence_px = (divergence / 100.0) * original_image.shape[1]
60+
separation_px = (separation / 100.0) * original_image.shape[1]
5761

5862
if fill_technique in ['none', 'naive', 'naive_interpolating']:
59-
return apply_stereo_divergence_naive(original_image, normalized_depth, divergence_px, fill_technique)
63+
return apply_stereo_divergence_naive(
64+
original_image, normalized_depth, divergence_px, separation_px, fill_technique
65+
)
6066
if fill_technique in ['polylines_soft', 'polylines_sharp']:
61-
return apply_stereo_divergence_polylines(original_image, normalized_depth, divergence_px, fill_technique)
67+
return apply_stereo_divergence_polylines(
68+
original_image, normalized_depth, divergence_px, separation_px, fill_technique
69+
)
6270

6371

6472
@njit
65-
def apply_stereo_divergence_naive(original_image, normalized_depth, divergence_px: float, fill_technique):
73+
def apply_stereo_divergence_naive(
74+
original_image, normalized_depth, divergence_px: float, separation_px: float, fill_technique):
6675
h, w, c = original_image.shape
6776

6877
derived_image = np.zeros_like(original_image)
@@ -72,7 +81,7 @@ def apply_stereo_divergence_naive(original_image, normalized_depth, divergence_p
7281
# Swipe order should ensure that pixels that are closer overwrite
7382
# (at their destination) pixels that are less close
7483
for col in range(w) if divergence_px < 0 else range(w - 1, -1, -1):
75-
col_d = col + int((normalized_depth[row][col] ** 2) * divergence_px)
84+
col_d = col + int((normalized_depth[row][col] ** 2) * divergence_px + separation_px)
7685
if 0 <= col_d < w:
7786
derived_image[row][col_d] = original_image[row][col]
7887
filled[row * w + col_d] = 1
@@ -127,7 +136,8 @@ def apply_stereo_divergence_naive(original_image, normalized_depth, divergence_p
127136

128137

129138
@njit(parallel=True) # fastmath=True does not reasonably improve performance
130-
def apply_stereo_divergence_polylines(original_image, normalized_depth, divergence_px: float, fill_technique):
139+
def apply_stereo_divergence_polylines(
140+
original_image, normalized_depth, divergence_px: float, separation_px: float, fill_technique):
131141
# This code treats rows of the image as polylines
132142
# It generates polylines, morphs them (applies divergence) to them, and then rasterizes them
133143
EPSILON = 1e-7
@@ -141,19 +151,19 @@ def apply_stereo_divergence_polylines(original_image, normalized_depth, divergen
141151
# format: new coordinate of the vertex, divergence (closeness), column of pixel that contains the point's color
142152
pt = np.zeros((5 + 2 * w, 3), dtype=np.float_)
143153
pt_end: int = 0
144-
pt[pt_end] = [-3.0 * abs(divergence_px), 0.0, 0.0]
154+
pt[pt_end] = [-1.0 * w, 0.0, 0.0]
145155
pt_end += 1
146156
for col in range(0, w):
147157
coord_d = (normalized_depth[row][col] ** 2) * divergence_px
148-
coord_x = col + 0.5 + coord_d
158+
coord_x = col + 0.5 + coord_d + separation_px
149159
if PIXEL_HALF_WIDTH < EPSILON:
150160
pt[pt_end] = [coord_x, abs(coord_d), col]
151161
pt_end += 1
152162
else:
153163
pt[pt_end] = [coord_x - PIXEL_HALF_WIDTH, abs(coord_d), col]
154164
pt[pt_end + 1] = [coord_x + PIXEL_HALF_WIDTH, abs(coord_d), col]
155165
pt_end += 2
156-
pt[pt_end] = [w + 3.0 * abs(divergence_px), 0.0, w - 1]
166+
pt[pt_end] = [2.0 * w, 0.0, w - 1]
157167
pt_end += 1
158168

159169
# generating the segments of the morphed polyline

0 commit comments

Comments
 (0)