4848#include " opencv2/core/cuda/functional.hpp"
4949#include " opencv2/core/cuda/utility.hpp"
5050#include " opencv2/core/cuda.hpp"
51+ #include < opencv2/cudev/ptr2d/texture.hpp>
5152
5253using namespace cv ::cuda;
5354using namespace cv ::cuda::device;
@@ -90,56 +91,17 @@ namespace cv { namespace cuda { namespace device
9091
9192namespace canny
9293{
93- struct SrcTex
94- {
95- virtual ~SrcTex () {}
96-
97- __host__ SrcTex (int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
98-
99- __device__ __forceinline__ virtual int operator ()(int y, int x) const = 0;
100-
101- int xoff;
102- int yoff;
103- };
104-
105- texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src (false , cudaFilterModePoint, cudaAddressModeClamp);
106- struct SrcTexRef : SrcTex
107- {
108- __host__ SrcTexRef (int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {}
109-
110- __device__ __forceinline__ int operator ()(int y, int x) const override
111- {
112- return tex2D (tex_src, x + xoff, y + yoff);
113- }
114- };
115-
116- struct SrcTexObj : SrcTex
117- {
118- __host__ SrcTexObj (int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { }
119-
120- __device__ __forceinline__ int operator ()(int y, int x) const override
121- {
122- return tex2D <uchar>(tex_src_object, x + xoff, y + yoff);
123- }
124-
125- cudaTextureObject_t tex_src_object;
126- };
127-
128- template <
129- class T ,
130- class Norm ,
131- typename = typename std::enable_if<std::is_base_of<SrcTex, T>::value>::type
132- >
133- __global__ void calcMagnitudeKernel (const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
94+ template <class Norm >
95+ __global__ void calcMagnitudeKernel (cv::cudev::TextureOffPtr<uchar> texSrc, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
13496 {
13597 const int x = blockIdx .x * blockDim .x + threadIdx .x ;
13698 const int y = blockIdx .y * blockDim .y + threadIdx .y ;
13799
138100 if (y >= mag.rows || x >= mag.cols )
139101 return ;
140102
141- int dxVal = (src (y - 1 , x + 1 ) + 2 * src (y, x + 1 ) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2 * src (y, x - 1 ) + src (y + 1 , x - 1 ));
142- int dyVal = (src (y + 1 , x - 1 ) + 2 * src (y + 1 , x) + src (y + 1 , x + 1 )) - (src (y - 1 , x - 1 ) + 2 * src (y - 1 , x) + src (y - 1 , x + 1 ));
103+ int dxVal = (texSrc (y - 1 , x + 1 ) + 2 * texSrc (y, x + 1 ) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2 * texSrc (y, x - 1 ) + texSrc (y + 1 , x - 1 ));
104+ int dyVal = (texSrc (y + 1 , x - 1 ) + 2 * texSrc (y + 1 , x) + texSrc (y + 1 , x + 1 )) - (texSrc (y - 1 , x - 1 ) + 2 * texSrc (y - 1 , x) + texSrc (y - 1 , x + 1 ));
143105
144106 dx (y, x) = dxVal;
145107 dy (y, x) = dyVal;
@@ -151,63 +113,20 @@ namespace canny
151113 {
152114 const dim3 block (16 , 16 );
153115 const dim3 grid (divUp (mag.cols , block.x ), divUp (mag.rows , block.y ));
154-
155- bool cc30 = deviceSupports (FEATURE_SET_COMPUTE_30);
156-
157- if (cc30)
116+ cv::cudev::TextureOff<uchar> texSrc (srcWhole, yoff, xoff);
117+ if (L2Grad)
158118 {
159- cudaTextureDesc texDesc;
160- memset (&texDesc, 0 , sizeof (texDesc));
161- texDesc.addressMode [0 ] = cudaAddressModeClamp;
162- texDesc.addressMode [1 ] = cudaAddressModeClamp;
163- texDesc.addressMode [2 ] = cudaAddressModeClamp;
164-
165- cudaTextureObject_t tex = 0 ;
166- createTextureObjectPitch2D (&tex, srcWhole, texDesc);
167-
168- SrcTexObj src (xoff, yoff, tex);
169-
170- if (L2Grad)
171- {
172- L2 norm;
173- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
174- }
175- else
176- {
177- L1 norm;
178- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
179- }
180-
181- cudaSafeCall ( cudaGetLastError () );
182-
183- if (stream == NULL )
184- cudaSafeCall ( cudaDeviceSynchronize () );
185- else
186- cudaSafeCall ( cudaStreamSynchronize (stream) );
187-
188- cudaSafeCall ( cudaDestroyTextureObject (tex) );
119+ L2 norm;
120+ calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
189121 }
190122 else
191123 {
192- bindTexture (&tex_src, srcWhole);
193- SrcTexRef src (xoff, yoff);
194-
195- if (L2Grad)
196- {
197- L2 norm;
198- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
199- }
200- else
201- {
202- L1 norm;
203- calcMagnitudeKernel<<<grid, block, 0 , stream>>> (src, dx, dy, mag, norm);
204- }
205-
206- cudaSafeCall ( cudaGetLastError () );
207-
208- if (stream == NULL )
209- cudaSafeCall ( cudaDeviceSynchronize () );
124+ L1 norm;
125+ calcMagnitudeKernel << <grid, block, 0 , stream >> > (texSrc, dx, dy, mag, norm);
210126 }
127+
128+ if (stream == NULL )
129+ cudaSafeCall (cudaDeviceSynchronize ());
211130 }
212131
213132 void calcMagnitude (PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
@@ -229,8 +148,7 @@ namespace canny
229148
230149namespace canny
231150{
232- texture<float , cudaTextureType2D, cudaReadModeElementType> tex_mag (false , cudaFilterModePoint, cudaAddressModeClamp);
233- __global__ void calcMapKernel (const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
151+ __global__ void calcMapKernel (cv::cudev::TexturePtr<float > texMag, const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
234152 {
235153 const int CANNY_SHIFT = 15 ;
236154 const int TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
@@ -245,7 +163,7 @@ namespace canny
245163 int dyVal = dy (y, x);
246164
247165 const int s = (dxVal ^ dyVal) < 0 ? -1 : 1 ;
248- const float m = tex2D (tex_mag , x, y );
166+ const float m = texMag (y , x);
249167
250168 dxVal = ::abs (dxVal);
251169 dyVal = ::abs (dyVal);
@@ -264,69 +182,17 @@ namespace canny
264182
265183 if (dyVal < tg22x)
266184 {
267- if (m > tex2D (tex_mag , x - 1 , y ) && m >= tex2D (tex_mag , x + 1 , y ))
185+ if (m > texMag (y , x - 1 ) && m >= texMag (y , x + 1 ))
268186 edge_type = 1 + (int )(m > high_thresh);
269187 }
270188 else if (dyVal > tg67x)
271189 {
272- if (m > tex2D (tex_mag, x, y - 1 ) && m >= tex2D (tex_mag, x, y + 1 ))
190+ if (m > texMag ( y - 1 , x ) && m >= texMag ( y + 1 , x ))
273191 edge_type = 1 + (int )(m > high_thresh);
274192 }
275193 else
276194 {
277- if (m > tex2D (tex_mag, x - s, y - 1 ) && m >= tex2D (tex_mag, x + s, y + 1 ))
278- edge_type = 1 + (int )(m > high_thresh);
279- }
280- }
281-
282- map (y, x) = edge_type;
283- }
284-
285- __global__ void calcMapKernel (const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh, cudaTextureObject_t tex_mag)
286- {
287- const int CANNY_SHIFT = 15 ;
288- const int TG22 = (int )(0.4142135623730950488016887242097 *(1 <<CANNY_SHIFT) + 0.5 );
289-
290- const int x = blockIdx .x * blockDim .x + threadIdx .x ;
291- const int y = blockIdx .y * blockDim .y + threadIdx .y ;
292-
293- if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1 )
294- return ;
295-
296- int dxVal = dx (y, x);
297- int dyVal = dy (y, x);
298-
299- const int s = (dxVal ^ dyVal) < 0 ? -1 : 1 ;
300- const float m = tex2D <float >(tex_mag, x, y);
301-
302- dxVal = ::abs (dxVal);
303- dyVal = ::abs (dyVal);
304-
305- // 0 - the pixel can not belong to an edge
306- // 1 - the pixel might belong to an edge
307- // 2 - the pixel does belong to an edge
308- int edge_type = 0 ;
309-
310- if (m > low_thresh)
311- {
312- const int tg22x = dxVal * TG22;
313- const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
314-
315- dyVal <<= CANNY_SHIFT;
316-
317- if (dyVal < tg22x)
318- {
319- if (m > tex2D <float >(tex_mag, x - 1 , y) && m >= tex2D <float >(tex_mag, x + 1 , y))
320- edge_type = 1 + (int )(m > high_thresh);
321- }
322- else if (dyVal > tg67x)
323- {
324- if (m > tex2D <float >(tex_mag, x, y - 1 ) && m >= tex2D <float >(tex_mag, x, y + 1 ))
325- edge_type = 1 + (int )(m > high_thresh);
326- }
327- else
328- {
329- if (m > tex2D <float >(tex_mag, x - s, y - 1 ) && m >= tex2D <float >(tex_mag, x + s, y + 1 ))
195+ if (m > texMag (y - 1 , x - s) && m >= texMag (y + 1 , x + s))
330196 edge_type = 1 + (int )(m > high_thresh);
331197 }
332198 }
@@ -338,47 +204,10 @@ namespace canny
338204 {
339205 const dim3 block (16 , 16 );
340206 const dim3 grid (divUp (dx.cols , block.x ), divUp (dx.rows , block.y ));
341-
342- if (deviceSupports (FEATURE_SET_COMPUTE_30))
343- {
344- // Use the texture object
345- cudaResourceDesc resDesc;
346- memset (&resDesc, 0 , sizeof (resDesc));
347- resDesc.resType = cudaResourceTypePitch2D;
348- resDesc.res .pitch2D .devPtr = mag.ptr ();
349- resDesc.res .pitch2D .height = mag.rows ;
350- resDesc.res .pitch2D .width = mag.cols ;
351- resDesc.res .pitch2D .pitchInBytes = mag.step ;
352- resDesc.res .pitch2D .desc = cudaCreateChannelDesc<float >();
353-
354- cudaTextureDesc texDesc;
355- memset (&texDesc, 0 , sizeof (texDesc));
356- texDesc.addressMode [0 ] = cudaAddressModeClamp;
357- texDesc.addressMode [1 ] = cudaAddressModeClamp;
358- texDesc.addressMode [2 ] = cudaAddressModeClamp;
359-
360- cudaTextureObject_t tex=0 ;
361- cudaCreateTextureObject (&tex, &resDesc, &texDesc, NULL );
362- calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh, tex);
363- cudaSafeCall ( cudaGetLastError () );
364-
365- if (stream == NULL )
366- cudaSafeCall ( cudaDeviceSynchronize () );
367- else
368- cudaSafeCall ( cudaStreamSynchronize (stream) );
369-
370- cudaSafeCall ( cudaDestroyTextureObject (tex) );
371- }
372- else
373- {
374- // Use the texture reference
375- bindTexture (&tex_mag, mag);
376- calcMapKernel<<<grid, block, 0 , stream>>> (dx, dy, map, low_thresh, high_thresh);
377- cudaSafeCall ( cudaGetLastError () );
378-
379- if (stream == NULL )
380- cudaSafeCall ( cudaDeviceSynchronize () );
381- }
207+ cv::cudev::Texture<float > texMag (mag);
208+ calcMapKernel<<<grid, block, 0 , stream>>> (texMag, dx, dy, map, low_thresh, high_thresh);
209+ if (stream == NULL )
210+ cudaSafeCall ( cudaDeviceSynchronize () );
382211 }
383212}
384213
0 commit comments