@@ -89,20 +89,19 @@ def ui(self, is_img2img):
8989 gen_stereo = gr .Checkbox (label = "Generate Stereo side-by-side image" ,value = False )
9090 gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
9191 with gr .Row ():
92- stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
93- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
92+ stereo_divergence = gr .Slider (minimum = 0.05 , maximum = 10.005 , step = 0.01 , label = 'Divergence (3D effect)' , value = 2.5 )
9493 with gr .Row ():
95- stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal ' , 'soft_horizontal' ], value = 'soft_horizontal ' , type = "index" , elem_id = "stereo_fill_type" )
94+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'naive ' , 'naive_interpolating' , 'polylines_soft' , 'polylines_sharp' ], value = 'naive_interpolating ' , type = "index" , elem_id = "stereo_fill_type" )
9695 stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
9796
9897
9998 with gr .Box ():
10099 gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
101100
102- return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ]
101+ return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ]
103102
104103 # run from script in txt2img or img2img
105- def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
104+ def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ):
106105
107106 # sd process
108107 processed = processing .process_images (p )
@@ -116,13 +115,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
116115 continue
117116 inputimages .append (processed .images [count ])
118117
119- newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
118+ newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance )
120119 for img in newmaps :
121120 processed .images .append (img )
122121
123122 return processed
124123
125- def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
124+ def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ):
126125
127126 # unload sd model
128127 shared .sd_model .cond_stage_model .to (devices .cpu )
@@ -250,7 +249,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
250249 # iterate over input (generated) images
251250 numimages = len (inputimages )
252251 for count in trange (0 , numimages ):
253-
252+
254253 #if numimages > 1:
255254 # print("\nDepthmap", count+1, '/', numimages)
256255 print ('\n ' )
@@ -357,13 +356,12 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
357356 if gen_stereo or gen_anaglyph :
358357 print ("Generating Stereo image.." )
359358 #img_output = cv2.blur(img_output, (3, 3))
360- deviation = calculate_total_deviation (stereo_ipd , stereo_size , inputimages [count ].width )
361359 balance = (stereo_balance + 1 ) / 2
362360 original_image = np .asarray (inputimages [count ])
363361 left_image = original_image if balance < 0.001 else \
364- apply_stereo_deviation (original_image , img_output , - deviation * balance , stereo_fill )
362+ apply_stereo_divergence (original_image , img_output , - stereo_divergence * balance , stereo_fill )
365363 right_image = original_image if balance > 0.999 else \
366- apply_stereo_deviation (original_image , img_output , deviation * (1 - balance ), stereo_fill )
364+ apply_stereo_divergence (original_image , img_output , stereo_divergence * (1 - balance ), stereo_fill )
367365 stereo_img = np .hstack ([left_image , right_image ])
368366
369367 if gen_stereo :
@@ -407,35 +405,35 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
407405
408406 return outimages
409407
410- def calculate_total_deviation (ipd , monitor_w , image_width ):
411- deviation_cm = ipd * 0.12
412- deviation = deviation_cm * monitor_w * (image_width / 1920 )
413- print ("deviation:" , deviation )
414- return deviation
415-
416- @njit
417- def apply_stereo_deviation (original_image , depth , deviation , fill_technique ):
418- h , w , c = original_image .shape
419-
408+ def apply_stereo_divergence (original_image , depth , divergence , fill_technique ):
420409 depth_min = depth .min ()
421410 depth_max = depth .max ()
422411 depth = (depth - depth_min ) / (depth_max - depth_min )
412+ divergence_px = (divergence / 100.0 ) * original_image .shape [1 ]
413+
414+ if fill_technique in [0 , 1 , 2 ]:
415+ return apply_stereo_divergence_naive (original_image , depth , divergence_px , fill_technique )
416+ if fill_technique in [3 , 4 ]:
417+ return apply_stereo_divergence_polylines (original_image , depth , divergence_px , fill_technique )
418+
419+ @njit
420+ def apply_stereo_divergence_naive (original_image , normalized_depth , divergence_px : float , fill_technique ):
421+ h , w , c = original_image .shape
423422
424423 derived_image = np .zeros_like (original_image )
425424 filled = np .zeros (h * w , dtype = np .uint8 )
426425
427- for row in range (h ):
426+ for row in prange (h ):
428427 # Swipe order should ensure that pixels that are closer overwrite
429428 # (at their destination) pixels that are less close
430- for col in range (w ) if deviation < 0 else range (w - 1 , - 1 , - 1 ):
431- col_d = col + int ((1 - depth [row ][col ] ** 2 ) * deviation )
432- # col_d = col + int((1 - depth[row][col]) * deviation)
429+ for col in range (w ) if divergence_px < 0 else range (w - 1 , - 1 , - 1 ):
430+ col_d = col + int ((1 - normalized_depth [row ][col ] ** 2 ) * divergence_px )
433431 if 0 <= col_d < w :
434432 derived_image [row ][col_d ] = original_image [row ][col ]
435433 filled [row * w + col_d ] = 1
436434
437435 # Fill the gaps
438- if fill_technique == 2 : # soft_horizontal
436+ if fill_technique == 2 : # naive_interpolating
439437 for row in range (h ):
440438 for l_pointer in range (w ):
441439 # This if (and the next if) performs two checks that are almost the same - for performance reasons
@@ -444,7 +442,7 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
444442 l_border = derived_image [row ][l_pointer - 1 ] if l_pointer > 0 else np .zeros (3 , dtype = np .uint8 )
445443 r_border = np .zeros (3 , dtype = np .uint8 )
446444 r_pointer = l_pointer + 1
447- while r_pointer != w :
445+ while r_pointer < w :
448446 if sum (derived_image [row ][r_pointer ]) != 0 and filled [row * w + r_pointer ]:
449447 r_border = derived_image [row ][r_pointer ]
450448 break
@@ -453,30 +451,169 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
453451 l_border = r_border
454452 elif sum (r_border ) == 0 :
455453 r_border = l_border
454+ # Example illustrating positions of pointers at this point in code:
455+ # is filled? : + - - - - +
456+ # pointers : l r
457+ # interpolated: 0 1 2 3 4 5
458+ # In total: 5 steps between two filled pixels
456459 total_steps = 1 + r_pointer - l_pointer
457460 step = (r_border .astype (np .float_ ) - l_border ) / total_steps
458461 for col in range (l_pointer , r_pointer ):
459462 derived_image [row ][col ] = l_border + (step * (col - l_pointer + 1 )).astype (np .uint8 )
460463 return derived_image
461- elif fill_technique == 1 : # hard_horizontal
464+ elif fill_technique == 1 : # naive
462465 derived_fix = np .copy (derived_image )
463466 for pos in np .where (filled == 0 )[0 ]:
464467 row = pos // w
465468 col = pos % w
466- for offset in range (1 , abs (int (deviation )) + 2 ):
469+ row_times_w = row * w
470+ for offset in range (1 , abs (int (divergence_px )) + 2 ):
467471 r_offset = col + offset
468472 l_offset = col - offset
469- if r_offset < w and filled [row * w + r_offset ]:
473+ if r_offset < w and filled [row_times_w + r_offset ]:
470474 derived_fix [row ][col ] = derived_image [row ][r_offset ]
471475 break
472- if 0 <= l_offset and filled [row * w + l_offset ]:
476+ if 0 <= l_offset and filled [row_times_w + l_offset ]:
473477 derived_fix [row ][col ] = derived_image [row ][l_offset ]
474478 break
475479 return derived_fix
476480 else : # none
477481 return derived_image
478482
479- @njit (parallel = True )
483+ @njit (fastmath = True , parallel = True )
484+ def apply_stereo_divergence_polylines (original_image , normalized_depth , divergence_px : float , fill_technique ):
485+ # This code treats rows of the image as polylines
486+ # It generates polylines, morphs them (applies divergence) to them, and then rasterizes them
487+ # Would be great to have some optimizations for it
488+
489+ # total_segments = 0
490+ # visible_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
491+ # overlapping_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
492+ # insertion_sort_operations = 0
493+
494+ EPSILON = 1e-7
495+ h , w , c = original_image .shape
496+ derived_image = np .zeros_like (original_image )
497+ SAMPLES = [1 / 6 , 3 / 6 , 5 / 6 ] if fill_technique == 3 else [0.1 , 0.3 , 0.5 , 0.7 , 0.9 ]
498+
499+ for row in prange (h ):
500+ # generating the polyline
501+ # format of each segment: new coordinate of first point, its divergence,
502+ # new coordinate of second point, its divergence,
503+ # original column of the first pixel, original column of the second pixel
504+ # it is not guaranteed that first pixel is the left pixel
505+ sg = np .zeros ((0 , 6 ), dtype = np .float_ )
506+ sg_end = 0
507+ if fill_technique == 3 : # polylines_soft
508+ sg = np .zeros ((w + 3 , 6 ), dtype = np .float_ )
509+ sg [sg_end ] = [- 3.0 * abs (divergence_px ), - 0.1 , - 1337.0 , - 0.1 , 0.0 , 0.0 ]
510+ sg_end += 1
511+ for col in range (0 , w - 1 ):
512+ ld = (1 - normalized_depth [row ][col ] ** 2 ) * divergence_px
513+ rd = (1 - normalized_depth [row ][col + 1 ] ** 2 ) * divergence_px
514+ lx , rx = ld + col , rd + (col + 1 )
515+ sg [sg_end ] = [lx , abs (ld ), rx , abs (rd ), float (col ), float (col + 1 )]
516+ sg_end += 1
517+ if col == 0 :
518+ sg [0 ][2 ] = sg [1 ][0 ] + EPSILON
519+ sg [sg_end ] = [sg [sg_end - 1 ][2 ] - EPSILON , - 0.1 , w + 3.0 * abs (divergence_px ), - 0.1 , w - 1 , w - 1 ]
520+ sg_end += 1
521+ if fill_technique == 4 : # polylines_sharp
522+ PIXEL_HALF_WIDTH = 0.45
523+ sg = np .zeros ((2 * w + 5 , 6 ), dtype = np .float_ )
524+ sg [sg_end ] = [- 3.0 * abs (divergence_px ), - 0.1 , - 1337.0 , - 0.1 , 0 , 0 ]
525+ sg_end += 1
526+ for col in range (0 , w ):
527+ # each pixel gets a segment
528+ d = (1 - normalized_depth [row ][col ] ** 2 ) * divergence_px
529+ center = col + d
530+ fx = center - PIXEL_HALF_WIDTH - EPSILON
531+ sx = center + PIXEL_HALF_WIDTH + EPSILON
532+
533+ if col == 0 :
534+ sg [0 ][2 ] = fx + EPSILON
535+ else :
536+ # each space between two adjacent pixels gets a segment
537+ sg [sg_end ] = [(sg [sg_end - 1 ][0 ] + sg [sg_end - 1 ][2 ]) / 2 , sg [sg_end - 1 ][3 ] - EPSILON ,
538+ center , abs (d ) - EPSILON ,
539+ col - 1 , col ]
540+ sg_end += 1
541+
542+ # each pixel gets a segment
543+ sg [sg_end ] = [fx , abs (d ), sx , abs (d ), col , col ]
544+ sg_end += 1
545+
546+ sg [sg_end ] = [sg [sg_end - 1 ][2 ] - EPSILON , - 0.1 , w + 3.0 * abs (divergence_px ), - 0.1 , w - 1 , w - 1 ]
547+ sg_end += 1
548+ # total_segments += sg_end
549+
550+ # sort segments using insertion sort
551+ # has a very good performance in practice, since segments are almost sorted to begin with
552+ for i in range (1 , sg_end ):
553+ u = i - 1
554+ while sg [u ][0 ] > sg [u + 1 ][0 ] and 0 <= u :
555+ # insertion_sort_operations += 1
556+ sg [u ], sg [u + 1 ] = np .copy (sg [u + 1 ]), np .copy (sg [u ])
557+ u -= 1
558+
559+ # Possible improvement: a more accurate logic instead of just sampling a region multiple times
560+ # rasterizing
561+ # at each point in time we keep track of segments that are "active" (or "current")
562+ cs = np .zeros ((5 * int (abs (divergence_px )) + 25 , 6 ), dtype = np .float_ )
563+ cs_end = 0
564+ seg_pointer = 0
565+ for col in range (w ):
566+ # removing from current segments
567+ cs_i = 0
568+ while cs_i < cs_end :
569+ if cs [cs_i ][2 ] < col :
570+ cs [cs_i ] = cs [cs_end - 1 ]
571+ cs_end -= 1
572+ else :
573+ cs_i += 1
574+
575+ # adding to current segments
576+ while seg_pointer < sg_end and sg [seg_pointer ][0 ] < col + 1.0 :
577+ cs [cs_end ] = sg [seg_pointer ]
578+ seg_pointer += 1
579+ cs_end += 1
580+
581+ color = np .full (c , 0.5 , dtype = np .float_ ) # we start with 0.5 because of how floats are converted to ints
582+ # visible_segments_col = np.zeros_like(samples)
583+ for sample_i in range (len (SAMPLES )):
584+ # finding the segment that is the closest at the position
585+ sample = SAMPLES [sample_i ]
586+ pos = col + sample
587+ best_i = 0
588+ best_closeness = - 1.1
589+ for cs_i in range (cs_end ):
590+ # interpolating, works regardless if first point is left point
591+ ip_k = (pos - cs [cs_i ][0 ]) / (cs [cs_i ][2 ] - cs [cs_i ][0 ])
592+ closeness = (1.0 - ip_k ) * cs [cs_i ][1 ] + ip_k * cs [cs_i ][3 ]
593+ if best_closeness < closeness and 0.0 < ip_k < 1.0 :
594+ best_closeness = closeness
595+ best_i = cs_i
596+ # overlapping_segments[cs_end] += 1
597+ # assert best_closeness > 0
598+ # visible_segments_col[sample_i] = best_i
599+
600+ # getting the color
601+ pos = col + sample
602+ col_l , col_r = int (cs [best_i ][4 ] + 0.001 ), int (cs [best_i ][5 ] + 0.001 )
603+ ip_k = (pos - cs [best_i ][0 ]) / (cs [best_i ][2 ] - cs [best_i ][0 ])
604+ color += (original_image [row ][col_l ] * (1.0 - ip_k ) + original_image [row ][col_r ] * ip_k ) / len (SAMPLES )
605+
606+ # visible_segments[len(np.unique(visible_segments_col))] += 1
607+ derived_image [row ][col ] = np .asarray (color , dtype = np .uint8 )
608+
609+ # print(f'image dimensions: h:{h}, w:{w}, total:{h*w}')
610+ # print('total segments: ', int(total_segments))
611+ # print('overlapping segments: ', list(overlapping_segments))
612+ # print('visible segments: ', list(visible_segments))
613+ # print('insertion sort operations: ', insertion_sort_operations)
614+ return derived_image
615+
616+ @njit (parallel = True )
480617def overlap (im1 , im2 ):
481618 width1 = im1 .shape [1 ]
482619 height1 = im1 .shape [0 ]
@@ -523,9 +660,8 @@ def run_generate(depthmap_mode,
523660 combine_output ,
524661 combine_output_axis ,
525662 gen_stereo ,
526- gen_anaglyph ,
527- stereo_ipd ,
528- stereo_size ,
663+ gen_anaglyph ,
664+ stereo_divergence ,
529665 stereo_fill ,
530666 stereo_balance
531667 ):
@@ -564,7 +700,7 @@ def run_generate(depthmap_mode,
564700 outpath = opts .outdir_samples or opts .outdir_extras_samples
565701
566702
567- outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
703+ outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance )
568704
569705 return outputs , plaintext_to_html ('info' ), ''
570706
@@ -614,10 +750,9 @@ def on_ui_tabs():
614750 gen_stereo = gr .Checkbox (label = "Generate Stereo side-by-side image" ,value = False )
615751 gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
616752 with gr .Row ():
617- stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
618- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
753+ stereo_divergence = gr .Slider (minimum = 0.05 , maximum = 10.005 , step = 0.01 , label = 'Divergence (3D effect)' , value = 2.5 )
619754 with gr .Row ():
620- stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal ' , 'soft_horizontal' ], value = 'soft_horizontal ' , type = "index" , elem_id = "stereo_fill_type" )
755+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'naive ' , 'naive_interpolating' , 'polylines_soft' , 'polylines_sharp' ], value = 'naive_interpolating ' , type = "index" , elem_id = "stereo_fill_type" )
621756 stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
622757
623758 with gr .Box ():
@@ -655,9 +790,8 @@ def on_ui_tabs():
655790 combine_output ,
656791 combine_output_axis ,
657792 gen_stereo ,
658- gen_anaglyph ,
659- stereo_ipd ,
660- stereo_size ,
793+ gen_anaglyph ,
794+ stereo_divergence ,
661795 stereo_fill ,
662796 stereo_balance
663797 ],
@@ -1404,7 +1538,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
14041538
14051539 # output
14061540 return cv2 .resize (imageandpatchs .estimation_updated_image , (input_resolution [1 ], input_resolution [0 ]), interpolation = cv2 .INTER_CUBIC )
1407-
1541+
14081542# taken from 3d-photo-inpainting and modified
14091543def sparse_bilateral_filtering (
14101544 depth , image , filter_size , depth_threshold , sigma_s , sigma_r , HR = False , mask = None , gsHR = True , edge_id = None , num_iter = None , num_gs_iter = None , spdb = False
@@ -1443,7 +1577,7 @@ def sparse_bilateral_filtering(
14431577def vis_depth_discontinuity (depth , depth_threshold , vis_diff = False , label = False , mask = None ):
14441578 """
14451579 config:
1446- -
1580+ -
14471581 """
14481582 if label == False :
14491583 disp = 1. / depth
0 commit comments