1111from modules .processing import create_infotext , process_images , Processed
1212from modules .shared import opts , cmd_opts , state , Options
1313from modules import script_callbacks
14+ from numba import njit
1415from torchvision .transforms import Compose , transforms
1516from PIL import Image
1617from pathlib import Path
@@ -85,14 +86,17 @@ def ui(self, is_img2img):
8586 with gr .Row ():
8687 stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
8788 stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
89+ with gr .Row ():
90+ stereo_fill = gr .Checkbox (label = "Improve accuracy" , value = False )
91+ stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
8892
8993 with gr .Box ():
9094 gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
9195
92- return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ]
96+ return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ]
9397
9498 # run from script in txt2img or img2img
95- def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ):
99+ def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
96100
97101 # sd process
98102 processed = processing .process_images (p )
@@ -106,13 +110,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
106110 continue
107111 inputimages .append (processed .images [count ])
108112
109- newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size )
113+ newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
110114 for img in newmaps :
111115 processed .images .append (img )
112116
113117 return processed
114118
115- def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size ):
119+ def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
116120
117121 # unload sd model
118122 shared .sd_model .cond_stage_model .to (devices .cpu )
@@ -331,14 +335,20 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
331335 if gen_stereo or gen_anaglyph :
332336 print ("Generating Stereo image.." )
333337 #img_output = cv2.blur(img_output, (3, 3))
334- left_img = np .asarray (inputimages [count ])
335- right_img = generate_stereo (left_img , img_output , stereo_ipd , stereo_size )
336- stereo_img = np .hstack ([right_img , inputimages [count ]])
338+ deviation = calculate_total_deviation (stereo_ipd , stereo_size , inputimages [count ].width )
339+ balance = (stereo_balance + 1 ) / 2
340+ original_image = np .asarray (inputimages [count ])
341+ left_image = original_image if balance < 0.001 else \
342+ apply_stereo_deviation (original_image , img_output , - deviation * balance , stereo_fill )
343+ right_image = original_image if balance > 0.999 else \
344+ apply_stereo_deviation (original_image , img_output , deviation * (1 - balance ), stereo_fill )
345+ stereo_img = np .hstack ([left_image , right_image ])
346+
337347 if gen_stereo :
338348 outimages .append (stereo_img )
339349 if gen_anaglyph :
340350 print ("Generating Anaglyph image.." )
341- anaglyph_img = overlap (right_img , left_img )
351+ anaglyph_img = overlap (left_image , right_image )
342352 outimages .append (anaglyph_img )
343353 if (processed is not None ):
344354 if gen_stereo :
@@ -375,45 +385,82 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
375385
376386 return outimages
377387
388+ def calculate_total_deviation (ipd , monitor_w , image_width ):
389+ deviation_cm = ipd * 0.12
390+ deviation = deviation_cm * monitor_w * (image_width / 1920 )
391+ print ("deviation:" , deviation )
392+ return deviation
378393
394+ def apply_stereo_deviation (original_image , depth , deviation , fill_technique ):
395+ import time
396+ print ("TIME:" , time .time ())
397+ ret = apply_stereo_deviation_core (original_image , depth , deviation , fill_technique )
398+ print ("TIME:" , time .time ())
399+ return ret
379400
380- def generate_stereo (left_img , depth , ipd , monitor_w ):
381- #MONITOR_W = 38.5 #50 #38.5
382- h , w , c = left_img .shape
401+ @njit
402+ def apply_stereo_deviation_core (original_image , depth , deviation , fill_technique ):
403+ #MONITOR_W = 38.5 #50 #38.5
404+ h , w , c = original_image .shape
383405
384406 depth_min = depth .min ()
385407 depth_max = depth .max ()
386408 depth = (depth - depth_min ) / (depth_max - depth_min )
387409
388- right = np .zeros_like (left_img )
389-
390- deviation_cm = ipd * 0.12
391- deviation = deviation_cm * monitor_w * (w / 1920 )
392-
393- print ("deviation:" , deviation )
410+ derived_image = np .zeros_like (original_image )
411+ filled = np .zeros (h * w , dtype = np .uint8 )
394412
395413 for row in range (h ):
396- for col in range (w ):
397- col_r = col - int ((1 - depth [row ][col ] ** 2 ) * deviation )
398- # col_r = col - int((1 - depth[row][col]) * deviation)
399- if col_r >= 0 :
400- right [row ][col_r ] = left_img [row ][col ]
401-
402- right_fix = np .array (right )
403- gray = cv2 .cvtColor (right_fix , cv2 .COLOR_BGR2GRAY )
404- rows , cols = np .where (gray == 0 )
405- for row , col in zip (rows , cols ):
406- for offset in range (1 , int (deviation )):
407- r_offset = col + offset
408- l_offset = col - offset
409- if r_offset < w and not np .all (right_fix [row ][r_offset ] == 0 ):
410- right_fix [row ][col ] = right_fix [row ][r_offset ]
411- break
412- if l_offset >= 0 and not np .all (right_fix [row ][l_offset ] == 0 ):
413- right_fix [row ][col ] = right_fix [row ][l_offset ]
414- break
415-
416- return right_fix
414+ # Swipe order should ensure that pixels that are closer overwrite
415+ # (at their destination) pixels that are less close
416+ for col in range (w ) if deviation < 0 else range (w - 1 , - 1 , - 1 ):
417+ col_d = col + int ((1 - depth [row ][col ] ** 2 ) * deviation )
418+ # col_d = col + int((1 - depth[row][col]) * deviation)
419+ if 0 <= col_d < w :
420+ derived_image [row ][col_d ] = original_image [row ][col ]
421+ filled [row * w + col_d ] = 1
422+
423+ # Fill the gaps
424+ if fill_technique == 2 : # soft_horizontal
425+ for row in range (h ):
426+ for l_pointer in range (w ):
427+ # This if (and the next if) performs two checks that are almost the same - for performance reasons
428+ if sum (derived_image [row ][l_pointer ]) != 0 or filled [row * w + l_pointer ]:
429+ continue
430+ l_border = derived_image [row ][l_pointer - 1 ] if l_pointer > 0 else np .zeros (3 , dtype = np .uint8 )
431+ r_border = np .zeros (3 , dtype = np .uint8 )
432+ r_pointer = l_pointer + 1
433+ while r_pointer != w :
434+ if sum (derived_image [row ][r_pointer ]) != 0 and filled [row * w + r_pointer ]:
435+ r_border = derived_image [row ][r_pointer ]
436+ break
437+ r_pointer += 1
438+ if sum (l_border ) == 0 :
439+ l_border = r_border
440+ elif sum (r_border ) == 0 :
441+ r_border = l_border
442+ total_steps = 1 + r_pointer - l_pointer
443+ step = (r_border .astype (np .float_ ) - l_border ) / total_steps
444+ for col in range (l_pointer , r_pointer ):
445+ derived_image [row ][col ] = l_border + (step * (col - l_pointer + 1 )).astype (np .uint8 )
446+ return derived_image
447+ elif fill_technique == 1 : # hard_horizontal
448+ derived_fix = np .copy (derived_image )
449+ for pos in np .where (filled == 0 )[0 ]:
450+ row = pos // w
451+ col = pos % w
452+ for offset in range (1 , abs (int (deviation )) + 2 ):
453+ r_offset = col + offset
454+ l_offset = col - offset
455+ if r_offset < w and filled [row * w + r_offset ]:
456+ derived_fix [row ][col ] = derived_image [row ][r_offset ]
457+ break
458+ if 0 <= l_offset and filled [row * w + l_offset ]:
459+ derived_fix [row ][col ] = derived_image [row ][l_offset ]
460+ break
461+ return derived_fix
462+ else : # none
463+ return derived_image
417464
418465def overlap (im1 , im2 ):
419466 width1 = im1 .shape [1 ]
@@ -463,7 +510,9 @@ def run_generate(depthmap_mode,
463510 gen_stereo ,
464511 gen_anaglyph ,
465512 stereo_ipd ,
466- stereo_size
513+ stereo_size ,
514+ stereo_fill ,
515+ stereo_balance
467516 ):
468517
469518 imageArr = []
@@ -500,7 +549,7 @@ def run_generate(depthmap_mode,
500549 outpath = opts .outdir_samples or opts .outdir_extras_samples
501550
502551
503- outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size )
552+ outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
504553
505554 return outputs , plaintext_to_html ('info' ), ''
506555
@@ -551,7 +600,10 @@ def on_ui_tabs():
551600 gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
552601 with gr .Row ():
553602 stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
554- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
603+ stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
604+ with gr .Row ():
605+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal' , 'soft_horizontal' ], value = 'soft_horizontal' , type = "index" , elem_id = "stereo_fill_type" )
606+ stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
555607
556608 with gr .Box ():
557609 gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
@@ -590,7 +642,9 @@ def on_ui_tabs():
590642 gen_stereo ,
591643 gen_anaglyph ,
592644 stereo_ipd ,
593- stereo_size
645+ stereo_size ,
646+ stereo_fill ,
647+ stereo_balance
594648 ],
595649 outputs = [
596650 result_images ,
@@ -1212,7 +1266,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
12121266
12131267 # Generate the base estimate using the double estimation.
12141268 whole_estimate = doubleestimate (img , net_receptive_field_size , whole_image_optimal_size , pix2pixsize , model , model_type , pix2pixmodel )
1215-
1269+
12161270 # Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
12171271 # small high-density regions of the image.
12181272 global factor
0 commit comments