Skip to content

Commit 80ff29a

Browse files
committed
Merge pull request #3132 from asmorkalov:as/stereo_prefilter
2 parents eed990a + 412028c commit 80ff29a

File tree

3 files changed

+116
-15
lines changed

3 files changed

+116
-15
lines changed

modules/cudastereo/src/cuda/stereobm.cu

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -403,23 +403,25 @@ namespace cv { namespace cuda { namespace device
403403
callers[winsz2](left, right, disp, maxdisp, stream);
404404
}
405405

406+
__device__ inline int clamp(int x, int a, int b)
407+
{
408+
return ::max(a, ::min(b, x));
409+
}
410+
406411
//////////////////////////////////////////////////////////////////////////////////////////////////
407412
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
408413
//////////////////////////////////////////////////////////////////////////////////////////////////
409414

410-
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
411-
412-
__global__ void prefilter_kernel(PtrStepSzb output, int prefilterCap)
415+
__global__ void prefilter_kernel_xsobel(PtrStepSzb input, PtrStepSzb output, int prefilterCap)
413416
{
414417
int x = blockDim.x * blockIdx.x + threadIdx.x;
415418
int y = blockDim.y * blockIdx.y + threadIdx.y;
416419

417420
if (x < output.cols && y < output.rows)
418421
{
419-
int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
420-
(int)tex2D(texForSobel, x - 1, y ) * (-2) + (int)tex2D(texForSobel, x + 1, y ) * (2) +
421-
(int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
422-
422+
int conv = input.ptr(::max(0,y-1))[::max(0,x-1)] * (-1) + input.ptr(::max(0, y-1))[::min(x+1, input.cols-1)] * (1) +
423+
input.ptr(y )[::max(0,x-1)] * (-2) + input.ptr(y )[::min(x+1, input.cols-1)] * (2) +
424+
input.ptr(::min(y+1, input.rows-1))[::max(0,x-1)] * (-1) + input.ptr(::min(y+1, input.rows-1))[::min(x+1,input.cols-1)] * (1);
423425

424426
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
425427
output.ptr(y)[x] = conv & 0xFF;
@@ -428,22 +430,65 @@ namespace cv { namespace cuda { namespace device
428430

429431
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
430432
{
431-
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
432-
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
433-
434433
dim3 threads(16, 16, 1);
435434
dim3 grid(1, 1, 1);
436435

437436
grid.x = divUp(input.cols, threads.x);
438437
grid.y = divUp(input.rows, threads.y);
439438

440-
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
439+
prefilter_kernel_xsobel<<<grid, threads, 0, stream>>>(input, output, prefilterCap);
441440
cudaSafeCall( cudaGetLastError() );
442441

443442
if (stream == 0)
444443
cudaSafeCall( cudaDeviceSynchronize() );
444+
}
445445

446-
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
446+
//////////////////////////////////////////////////////////////////////////////////////////////////
447+
/////////////////////////////////////// Norm Prefiler ///////////////////////////////////////////
448+
//////////////////////////////////////////////////////////////////////////////////////////////////
449+
450+
__global__ void prefilter_kernel_norm(PtrStepSzb input, PtrStepSzb output, int prefilterCap, int scale_g, int scale_s, int winsize)
451+
{
452+
// prefilterCap in range 1..63, checked in StereoBMImpl::compute
453+
int x = blockDim.x * blockIdx.x + threadIdx.x;
454+
int y = blockDim.y * blockIdx.y + threadIdx.y;
455+
int cols = input.cols;
456+
int rows = input.rows;
457+
int WSZ2 = winsize / 2;
458+
459+
if(x < cols && y < rows)
460+
{
461+
int cov1 = input.ptr(::max(y-1, 0))[x] * 1 +
462+
input.ptr(y)[::min(x+1, cols-1)] * 1 + input.ptr(y )[x] * 4 + input.ptr(y)[::min(x+1, cols-1)] * 1 +
463+
input.ptr(::min(y+1, rows-1))[x] * 1;
464+
465+
int cov2 = 0;
466+
for(int i = -WSZ2; i < WSZ2+1; i++)
467+
for(int j = -WSZ2; j < WSZ2+1; j++)
468+
cov2 += input.ptr(clamp(y+i, 0, rows-1))[clamp(x+j, 0, cols-1)];
469+
470+
int res = (cov1*scale_g - cov2*scale_s)>>10;
471+
res = clamp(res, -prefilterCap, prefilterCap) + prefilterCap;
472+
output.ptr(y)[x] = res;
473+
}
474+
}
475+
476+
void prefilter_norm(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, int winsize, cudaStream_t & stream)
477+
{
478+
dim3 threads(16, 16, 1);
479+
dim3 grid(1, 1, 1);
480+
481+
grid.x = divUp(input.cols, threads.x);
482+
grid.y = divUp(input.rows, threads.y);
483+
484+
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
485+
scale_g *= scale_s;
486+
487+
prefilter_kernel_norm<<<grid, threads, 0, stream>>>(input, output, prefilterCap, scale_g, scale_s, winsize);
488+
cudaSafeCall( cudaGetLastError() );
489+
490+
if (stream == 0)
491+
cudaSafeCall( cudaDeviceSynchronize() );
447492
}
448493

449494

modules/cudastereo/src/stereobm.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ namespace cv { namespace cuda { namespace device
5757
{
5858
void stereoBM_CUDA(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
5959
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
60+
void prefilter_norm(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, int winsize, cudaStream_t & stream);
6061
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream);
6162
}
6263
}}}
@@ -92,8 +93,8 @@ namespace
9293
int getPreFilterType() const { return preset_; }
9394
void setPreFilterType(int preFilterType) { preset_ = preFilterType; }
9495

95-
int getPreFilterSize() const { return 0; }
96-
void setPreFilterSize(int /*preFilterSize*/) {}
96+
int getPreFilterSize() const { return preFilterSize_; }
97+
void setPreFilterSize(int preFilterSize) { preFilterSize_ = preFilterSize; }
9798

9899
int getPreFilterCap() const { return preFilterCap_; }
99100
void setPreFilterCap(int preFilterCap) { preFilterCap_ = preFilterCap; }
@@ -119,12 +120,13 @@ namespace
119120
int winSize_;
120121
int preFilterCap_;
121122
float avergeTexThreshold_;
123+
int preFilterSize_;
122124

123125
GpuMat minSSD_, leBuf_, riBuf_;
124126
};
125127

126128
StereoBMImpl::StereoBMImpl(int numDisparities, int blockSize)
127-
: preset_(0), ndisp_(numDisparities), winSize_(blockSize), preFilterCap_(31), avergeTexThreshold_(3)
129+
: preset_(-1), ndisp_(numDisparities), winSize_(blockSize), preFilterCap_(31), avergeTexThreshold_(3), preFilterSize_(9)
128130
{
129131
}
130132

@@ -169,6 +171,17 @@ namespace
169171
le_for_bm = leBuf_;
170172
ri_for_bm = riBuf_;
171173
}
174+
else if(preset_ == cv::StereoBM::PREFILTER_NORMALIZED_RESPONSE)
175+
{
176+
cuda::ensureSizeIsEnough(left.size(), left.type(), leBuf_);
177+
cuda::ensureSizeIsEnough(right.size(), right.type(), riBuf_);
178+
179+
prefilter_norm( left, leBuf_, preFilterCap_, preFilterSize_, stream);
180+
prefilter_norm(right, riBuf_, preFilterCap_, preFilterSize_, stream);
181+
182+
le_for_bm = leBuf_;
183+
ri_for_bm = riBuf_;
184+
}
172185

173186
stereoBM_CUDA(le_for_bm, ri_for_bm, disparity, ndisp_, winSize_, minSSD_, stream);
174187

modules/cudastereo/test/test_stereo.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,49 @@ CUDA_TEST_P(StereoBM, Regression)
7979
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
8080
}
8181

82+
CUDA_TEST_P(StereoBM, PrefilterXSobelRegression)
83+
{
84+
cv::Mat left_image = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
85+
cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
86+
cv::Mat disp_gold = readImage("stereobm/aloe-disp-prefilter-xsobel.png", cv::IMREAD_GRAYSCALE);
87+
88+
ASSERT_FALSE(left_image.empty());
89+
ASSERT_FALSE(right_image.empty());
90+
ASSERT_FALSE(disp_gold.empty());
91+
92+
cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
93+
cv::cuda::GpuMat disp;
94+
95+
bm->setPreFilterType(cv::StereoBM::PREFILTER_XSOBEL);
96+
bm->compute(loadMat(left_image), loadMat(right_image), disp);
97+
98+
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
99+
}
100+
101+
CUDA_TEST_P(StereoBM, PrefilterNormRegression)
102+
{
103+
cv::Mat left_image = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
104+
cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
105+
cv::Mat disp_gold = readImage("stereobm/aloe-disp-prefilter-norm.png", cv::IMREAD_GRAYSCALE);
106+
107+
ASSERT_FALSE(left_image.empty());
108+
ASSERT_FALSE(right_image.empty());
109+
ASSERT_FALSE(disp_gold.empty());
110+
111+
cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
112+
cv::cuda::GpuMat disp;
113+
114+
bm->setPreFilterType(cv::StereoBM::PREFILTER_NORMALIZED_RESPONSE);
115+
bm->setPreFilterSize(9);
116+
bm->compute(loadMat(left_image), loadMat(right_image), disp);
117+
118+
cv::Mat disp_cpu;
119+
disp.download(disp_cpu);
120+
cv::imwrite("aloe-disp-prefilter-norm.png", disp_cpu);
121+
122+
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
123+
}
124+
82125
INSTANTIATE_TEST_CASE_P(CUDA_Stereo, StereoBM, ALL_DEVICES);
83126

84127
//////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)