Skip to content

Commit 4cba139

Browse files
committed
keep cufftPlan2d across ConvolveImpl::convolve calls
on some CUDA versions creating/destroying cufftPlan2d is very time consuming we now create them in ConvolveImpl::create() and destroy them in the dtor this solves issue #3385
1 parent d6102ef commit 4cba139

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

modules/cudaarithm/src/arithm.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ namespace
440440
{
441441
public:
442442
explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {}
443+
~ConvolutionImpl();
443444

444445
void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null());
445446

@@ -452,6 +453,8 @@ namespace
452453
Size user_block_size;
453454
Size dft_size;
454455

456+
cufftHandle planR2C, planC2R;
457+
455458
GpuMat image_spect, templ_spect, result_spect;
456459
GpuMat image_block, templ_block, result_data;
457460
};
@@ -491,6 +494,15 @@ namespace
491494
// Use maximum result matrix block size for the estimated DFT block size
492495
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
493496
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
497+
498+
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
499+
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
500+
}
501+
502+
ConvolutionImpl::~ConvolutionImpl()
503+
{
504+
cufftSafeCall( cufftDestroy(planR2C) );
505+
cufftSafeCall( cufftDestroy(planC2R) );
494506
}
495507

496508
Size ConvolutionImpl::estimateBlockSize(Size result_size)
@@ -516,10 +528,6 @@ namespace
516528

517529
cudaStream_t stream = StreamAccessor::getStream(_stream);
518530

519-
cufftHandle planR2C, planC2R;
520-
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
521-
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
522-
523531
cufftSafeCall( cufftSetStream(planR2C, stream) );
524532
cufftSafeCall( cufftSetStream(planC2R, stream) );
525533

@@ -559,9 +567,6 @@ namespace
559567
}
560568
}
561569

562-
cufftSafeCall( cufftDestroy(planR2C) );
563-
cufftSafeCall( cufftDestroy(planC2R) );
564-
565570
syncOutput(result, _result, _stream);
566571
}
567572
}

0 commit comments

Comments
 (0)