4545using namespace cv ;
4646using namespace cv ::cuda;
4747
48+ #define USE_NPP_STREAM_CONTEXT NPP_VERSION >= (10 * 1000 + 1 * 100 + 0 )
49+ #define USE_NPP_STREAM_CONTEXT 0
50+ #if USE_NPP_STREAM_CONTEXT
51+ #define CTX_PREFIX _Ctx
52+ #else
53+ #define CTX_PREFIX
54+ #endif
55+ #define PPCAT_NX (A, B ) A ## B
56+ #define PPCAT (A, B ) PPCAT_NX(A, B)
57+ #define TRY_CTX (func ) PPCAT(func, CTX_PREFIX)
4858#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
4959
5060void cv::cuda::warpAffine (InputArray, OutputArray, InputArray, Size, int , int , Scalar, Stream&) { throw_no_cuda (); }
@@ -135,13 +145,16 @@ void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize,
135145
136146namespace
137147{
138- template <int DEPTH> struct NppWarpFunc
148+ template <int DEPTH>
149+ struct NppWarpFunc
139150 {
140151 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
141-
142- typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
143- int dstStep, NppiRect dstRoi, const double coeffs[][3 ],
144- int interpolation);
152+ #if USE_NPP_STREAM_CONTEXT
153+ typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3 ], int interpolation,
154+ NppStreamContext stream_ctx);
155+ #else
156+ typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3 ], int interpolation);
157+ #endif
145158 };
146159
147160 template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
@@ -168,11 +181,15 @@ namespace
168181 dstroi.height = dst.rows ;
169182 dstroi.width = dst.cols ;
170183
171- cv::cuda::NppStreamHandler h (stream);
172-
173- nppSafeCall ( func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi,
174- dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi,
175- coeffs, npp_inter[interpolation]) );
184+ #if USE_NPP_STREAM_CONTEXT
185+ NppStreamContext nppStreamContext{};
186+ nppSafeCall (nppGetStreamContext (&nppStreamContext));
187+ nppStreamContext.hStream = stream;
188+ nppSafeCall (func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi, dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi, coeffs, npp_inter[interpolation], nppStreamContext));
189+ #else
190+ NppStreamHandler h (stream);
191+ nppSafeCall (func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi, dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi, coeffs, npp_inter[interpolation]));
192+ #endif
176193
177194 if (stream == 0 )
178195 cudaSafeCall ( cudaDeviceSynchronize () );
@@ -251,20 +268,20 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
251268 static const func_t funcs[2 ][6 ][4 ] =
252269 {
253270 {
254- {NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
271+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C4R) >::call},
255272 {0 , 0 , 0 , 0 },
256- {NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
273+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C4R) >::call},
257274 {0 , 0 , 0 , 0 },
258- {NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
259- {NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
275+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C4R) >::call},
276+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C4R) >::call}
260277 },
261278 {
262- {NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
279+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C4R) >::call},
263280 {0 , 0 , 0 , 0 },
264- {NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
281+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C4R) >::call},
265282 {0 , 0 , 0 , 0 },
266- {NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
267- {NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
283+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C4R) >::call},
284+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C4R) >::call}
268285 }
269286 };
270287
@@ -390,20 +407,20 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
390407 static const func_t funcs[2 ][6 ][4 ] =
391408 {
392409 {
393- {NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
410+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C4R) >::call},
394411 {0 , 0 , 0 , 0 },
395- {NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
412+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C4R) >::call},
396413 {0 , 0 , 0 , 0 },
397- {NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
398- {NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
414+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C4R) >::call},
415+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C4R) >::call}
399416 },
400417 {
401- {NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
418+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C4R) >::call},
402419 {0 , 0 , 0 , 0 },
403- {NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
420+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C4R) >::call},
404421 {0 , 0 , 0 , 0 },
405- {NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
406- {NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
422+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C4R) >::call},
423+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C4R) >::call}
407424 }
408425 };
409426
0 commit comments