@@ -53,55 +53,75 @@ namespace cv { namespace cuda { namespace device
5353{
5454 namespace imgproc
5555 {
56- __constant__ float c_warpMat[3 * 3 ];
57-
5856 struct AffineTransform
5957 {
60- static __device__ __forceinline__ float2 calcCoord (int x, int y)
58+ static const int rows = 2 ;
59+ static __device__ __forceinline__ float2 calcCoord (const float warpMat[AffineTransform::rows * 3 ], int x, int y)
6160 {
62- const float xcoo = c_warpMat [0 ] * x + c_warpMat [1 ] * y + c_warpMat [2 ];
63- const float ycoo = c_warpMat [3 ] * x + c_warpMat [4 ] * y + c_warpMat [5 ];
61+ const float xcoo = warpMat [0 ] * x + warpMat [1 ] * y + warpMat [2 ];
62+ const float ycoo = warpMat [3 ] * x + warpMat [4 ] * y + warpMat [5 ];
6463
6564 return make_float2 (xcoo, ycoo);
6665 }
66+
67+ struct Coefficients
68+ {
69+ Coefficients (const float * c_)
70+ {
71+ for (int i = 0 ; i < AffineTransform::rows * 3 ; i++)
72+ c[i] = c_[i];
73+ }
74+ float c[AffineTransform::rows * 3 ];
75+ };
6776 };
6877
6978 struct PerspectiveTransform
7079 {
71- static __device__ __forceinline__ float2 calcCoord (int x, int y)
80+ static const int rows = 3 ;
81+ static __device__ __forceinline__ float2 calcCoord (const float warpMat[PerspectiveTransform::rows * 3 ], int x, int y)
7282 {
73- const float coeff = 1 .0f / (c_warpMat [6 ] * x + c_warpMat [7 ] * y + c_warpMat [8 ]);
83+ const float coeff = 1 .0f / (warpMat [6 ] * x + warpMat [7 ] * y + warpMat [8 ]);
7484
75- const float xcoo = coeff * (c_warpMat [0 ] * x + c_warpMat [1 ] * y + c_warpMat [2 ]);
76- const float ycoo = coeff * (c_warpMat [3 ] * x + c_warpMat [4 ] * y + c_warpMat [5 ]);
85+ const float xcoo = coeff * (warpMat [0 ] * x + warpMat [1 ] * y + warpMat [2 ]);
86+ const float ycoo = coeff * (warpMat [3 ] * x + warpMat [4 ] * y + warpMat [5 ]);
7787
7888 return make_float2 (xcoo, ycoo);
7989 }
90+ struct Coefficients
91+ {
92+ Coefficients (const float * c_)
93+ {
94+ for (int i = 0 ; i < PerspectiveTransform::rows * 3 ; i++)
95+ c[i] = c_[i];
96+ }
97+
98+ float c[PerspectiveTransform::rows * 3 ];
99+ };
80100 };
81101
82102 // /////////////////////////////////////////////////////////////////
83103 // Build Maps
84104
85- template <class Transform > __global__ void buildWarpMaps (PtrStepSzf xmap, PtrStepf ymap)
105+ template <class Transform > __global__ void buildWarpMaps (PtrStepSzf xmap, PtrStepf ymap, const typename Transform::Coefficients warpMat )
86106 {
87107 const int x = blockDim .x * blockIdx .x + threadIdx .x ;
88108 const int y = blockDim .y * blockIdx .y + threadIdx .y ;
89109
90110 if (x < xmap.cols && y < xmap.rows )
91111 {
92- const float2 coord = Transform::calcCoord (x, y);
112+ const float2 coord = Transform::calcCoord (warpMat. c , x, y);
93113
94114 xmap (y, x) = coord.x ;
95115 ymap (y, x) = coord.y ;
96116 }
97117 }
98118
99- template <class Transform > void buildWarpMaps_caller (PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
119+ template <class Transform > void buildWarpMaps_caller (PtrStepSzf xmap, PtrStepSzf ymap, const float warpMat[Transform::rows * 3 ], cudaStream_t stream)
100120 {
101121 dim3 block (32 , 8 );
102122 dim3 grid (divUp (xmap.cols , block.x ), divUp (xmap.rows , block.y ));
103123
104- buildWarpMaps<Transform><<<grid, block, 0 , stream>>> (xmap, ymap);
124+ buildWarpMaps<Transform><<<grid, block, 0 , stream>>> (xmap, ymap, warpMat );
105125 cudaSafeCall ( cudaGetLastError () );
106126
107127 if (stream == 0 )
@@ -110,37 +130,33 @@ namespace cv { namespace cuda { namespace device
110130
111131 void buildWarpAffineMaps_gpu (float coeffs[2 * 3 ], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
112132 {
113- cudaSafeCall ( cudaMemcpyToSymbol (c_warpMat, coeffs, 2 * 3 * sizeof (float )) );
114-
115- buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
133+ buildWarpMaps_caller<AffineTransform>(xmap, ymap, coeffs, stream);
116134 }
117135
118136 void buildWarpPerspectiveMaps_gpu (float coeffs[3 * 3 ], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
119137 {
120- cudaSafeCall ( cudaMemcpyToSymbol (c_warpMat, coeffs, 3 * 3 * sizeof (float )) );
121-
122- buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
138+ buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, coeffs, stream);
123139 }
124140
125141 // /////////////////////////////////////////////////////////////////
126142 // Warp
127143
128- template <class Transform , class Ptr2D , typename T> __global__ void warp (const Ptr2D src, PtrStepSz<T> dst)
144+ template <class Transform , class Ptr2D , typename T> __global__ void warp (const Ptr2D src, PtrStepSz<T> dst, const typename Transform::Coefficients warpMat )
129145 {
130146 const int x = blockDim .x * blockIdx .x + threadIdx .x ;
131147 const int y = blockDim .y * blockIdx .y + threadIdx .y ;
132148
133149 if (x < dst.cols && y < dst.rows )
134150 {
135- const float2 coord = Transform::calcCoord (x, y);
151+ const float2 coord = Transform::calcCoord (warpMat. c , x, y);
136152
137153 dst.ptr (y)[x] = saturate_cast<T>(src (coord.y , coord.x ));
138154 }
139155 }
140156
141157 template <class Transform , template <typename > class Filter , template <typename > class B , typename T> struct WarpDispatcherStream
142158 {
143- static void call (PtrStepSz<T> src, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool )
159+ static void call (PtrStepSz<T> src, PtrStepSz<T> dst, const float * borderValue, const float warpMat[Transform::rows* 3 ], cudaStream_t stream, bool )
144160 {
145161 typedef typename TypeVec<float , VecTraits<T>::cn>::vec_type work_type;
146162
@@ -151,14 +167,14 @@ namespace cv { namespace cuda { namespace device
151167 BorderReader< PtrStep<T>, B<work_type> > brdSrc (src, brd);
152168 Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src (brdSrc);
153169
154- warp<Transform><<<grid, block, 0 , stream>>> (filter_src, dst);
170+ warp<Transform><<<grid, block, 0 , stream>>> (filter_src, dst, warpMat );
155171 cudaSafeCall ( cudaGetLastError () );
156172 }
157173 };
158174
159175 template <class Transform , template <typename > class Filter , template <typename > class B , typename T> struct WarpDispatcherNonStream
160176 {
161- static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, bool )
177+ static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, const float warpMat[Transform::rows* 3 ], bool )
162178 {
163179 CV_UNUSED (xoff);
164180 CV_UNUSED (yoff);
@@ -173,7 +189,7 @@ namespace cv { namespace cuda { namespace device
173189 BorderReader< PtrStep<T>, B<work_type> > brdSrc (src, brd);
174190 Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src (brdSrc);
175191
176- warp<Transform><<<grid, block>>> (filter_src, dst);
192+ warp<Transform><<<grid, block>>> (filter_src, dst, warpMat );
177193 cudaSafeCall ( cudaGetLastError () );
178194
179195 cudaSafeCall ( cudaDeviceSynchronize () );
@@ -195,7 +211,7 @@ namespace cv { namespace cuda { namespace device
195211 }; \
196212 template <class Transform , template <typename > class Filter , template <typename > class B > struct WarpDispatcherNonStream <Transform, Filter, B, type> \
197213 { \
198- static void call (PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float * borderValue, bool cc20) \
214+ static void call (PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float * borderValue, const float warpMat[Transform::rows* 3 ], bool cc20) \
199215 { \
200216 typedef typename TypeVec<float , VecTraits< type >::cn>::vec_type work_type; \
201217 dim3 block (32 , cc20 ? 8 : 4 ); \
@@ -205,14 +221,14 @@ namespace cv { namespace cuda { namespace device
205221 B<work_type> brd (src.rows , src.cols , VecTraits<work_type>::make (borderValue)); \
206222 BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc (texSrc, brd); \
207223 Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src (brdSrc); \
208- warp<Transform><<<grid, block>>> (filter_src, dst); \
224+ warp<Transform><<<grid, block>>> (filter_src, dst, warpMat ); \
209225 cudaSafeCall ( cudaGetLastError () ); \
210226 cudaSafeCall ( cudaDeviceSynchronize () ); \
211227 } \
212228 }; \
213229 template <class Transform , template <typename > class Filter > struct WarpDispatcherNonStream <Transform, Filter, BrdReplicate, type> \
214230 { \
215- static void call (PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float *, bool ) \
231+ static void call (PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float *, const float warpMat[Transform::rows* 3 ], bool ) \
216232 { \
217233 dim3 block (32 , 8 ); \
218234 dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y )); \
@@ -221,14 +237,14 @@ namespace cv { namespace cuda { namespace device
221237 if (srcWhole.cols == src.cols && srcWhole.rows == src.rows ) \
222238 { \
223239 Filter< tex_warp_ ## type ##_reader > filter_src (texSrc); \
224- warp<Transform><<<grid, block>>> (filter_src, dst); \
240+ warp<Transform><<<grid, block>>> (filter_src, dst, warpMat ); \
225241 } \
226242 else \
227243 { \
228244 BrdReplicate<type> brd (src.rows , src.cols ); \
229245 BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc (texSrc, brd); \
230246 Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src (brdSrc); \
231- warp<Transform><<<grid, block>>> (filter_src, dst); \
247+ warp<Transform><<<grid, block>>> (filter_src, dst, warpMat ); \
232248 } \
233249 cudaSafeCall ( cudaGetLastError () ); \
234250 cudaSafeCall ( cudaDeviceSynchronize () ); \
@@ -263,20 +279,20 @@ namespace cv { namespace cuda { namespace device
263279
264280 template <class Transform , template <typename > class Filter , template <typename > class B , typename T> struct WarpDispatcher
265281 {
266- static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20)
282+ static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, const float warpMat[Transform::rows* 3 ], cudaStream_t stream, bool cc20)
267283 {
268284 if (stream == 0 )
269- WarpDispatcherNonStream<Transform, Filter, B, T>::call (src, srcWhole, xoff, yoff, dst, borderValue, cc20);
285+ WarpDispatcherNonStream<Transform, Filter, B, T>::call (src, srcWhole, xoff, yoff, dst, borderValue, warpMat, cc20);
270286 else
271- WarpDispatcherStream<Transform, Filter, B, T>::call (src, dst, borderValue, stream, cc20);
287+ WarpDispatcherStream<Transform, Filter, B, T>::call (src, dst, borderValue, warpMat, stream, cc20);
272288 }
273289 };
274290
275291 template <class Transform , typename T>
276292 void warp_caller (PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
277- int borderMode, const float * borderValue, cudaStream_t stream, bool cc20)
293+ int borderMode, const float * borderValue, const float warpMat[Transform::rows* 3 ], cudaStream_t stream, bool cc20)
278294 {
279- typedef void (*func_t )(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20);
295+ typedef void (*func_t )(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float * borderValue, const float warpMat[Transform::rows* 3 ], cudaStream_t stream, bool cc20);
280296
281297 static const func_t funcs[3 ][5 ] =
282298 {
@@ -304,15 +320,13 @@ namespace cv { namespace cuda { namespace device
304320 };
305321
306322 funcs[interpolation][borderMode](static_cast < PtrStepSz<T> >(src), static_cast < PtrStepSz<T> >(srcWhole), xoff, yoff,
307- static_cast < PtrStepSz<T> >(dst), borderValue, stream, cc20);
323+ static_cast < PtrStepSz<T> >(dst), borderValue, warpMat , stream, cc20);
308324 }
309325
310326 template <typename T> void warpAffine_gpu (PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3 ], PtrStepSzb dst, int interpolation,
311327 int borderMode, const float * borderValue, cudaStream_t stream, bool cc20)
312328 {
313- cudaSafeCall ( cudaMemcpyToSymbol (c_warpMat, coeffs, 2 * 3 * sizeof (float )) );
314-
315- warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
329+ warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, coeffs, stream, cc20);
316330 }
317331
318332 template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3 ], PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
@@ -348,9 +362,7 @@ namespace cv { namespace cuda { namespace device
348362 template <typename T> void warpPerspective_gpu (PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3 ], PtrStepSzb dst, int interpolation,
349363 int borderMode, const float * borderValue, cudaStream_t stream, bool cc20)
350364 {
351- cudaSafeCall ( cudaMemcpyToSymbol (c_warpMat, coeffs, 3 * 3 * sizeof (float )) );
352-
353- warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
365+ warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, coeffs, stream, cc20);
354366 }
355367
356368 template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3 ], PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
0 commit comments