@@ -132,45 +132,71 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
132132// //////////////////////////////////////////////////////////////////////
133133// meanStdDev
134134
135- void cv::cuda::meanStdDev (InputArray _src , OutputArray _dst , Stream& stream)
135+ void cv::cuda::meanStdDev (InputArray src , OutputArray dst , Stream& stream)
136136{
137137 if (!deviceSupports (FEATURE_SET_COMPUTE_13))
138138 CV_Error (cv::Error::StsNotImplemented, " Not sufficient compute capebility" );
139139
140- const GpuMat src = getInputMat (_src , stream);
140+ const GpuMat gsrc = getInputMat (src , stream);
141141
142- CV_Assert ( src.type () == CV_8UC1 );
142+ #if (CUDA_VERSION <= 4020)
143+ CV_Assert ( gsrc.type () == CV_8UC1 );
144+ #else
145+ CV_Assert ( (gsrc.type () == CV_8UC1) || (gsrc.type () == CV_32FC1) );
146+ #endif
143147
144- GpuMat dst = getOutputMat (_dst , 1 , 2 , CV_64FC1, stream);
148+ GpuMat gdst = getOutputMat (dst , 1 , 2 , CV_64FC1, stream);
145149
146150 NppiSize sz;
147- sz.width = src .cols ;
148- sz.height = src .rows ;
151+ sz.width = gsrc .cols ;
152+ sz.height = gsrc .rows ;
149153
150154 int bufSize;
151155#if (CUDA_VERSION <= 4020)
152156 nppSafeCall ( nppiMeanStdDev8uC1RGetBufferHostSize (sz, &bufSize) );
153157#else
154- nppSafeCall ( nppiMeanStdDevGetBufferHostSize_8u_C1R (sz, &bufSize) );
158+ if (gsrc.type () == CV_8UC1)
159+ nppSafeCall ( nppiMeanStdDevGetBufferHostSize_8u_C1R (sz, &bufSize) );
160+ else
161+ nppSafeCall ( nppiMeanStdDevGetBufferHostSize_32f_C1R (sz, &bufSize) );
155162#endif
156163
157164 BufferPool pool (stream);
158- GpuMat buf = pool.getBuffer (1 , bufSize, CV_8UC1 );
165+ GpuMat buf = pool.getBuffer (1 , bufSize, gsrc. type () );
159166
160167 // detail: https://github.com/opencv/opencv/issues/11063
161168 // NppStreamHandler h(StreamAccessor::getStream(stream));
162169
163- nppSafeCall ( nppiMean_StdDev_8u_C1R (src.ptr <Npp8u>(), static_cast <int >(src.step ), sz, buf.ptr <Npp8u>(), dst.ptr <Npp64f>(), dst.ptr <Npp64f>() + 1 ) );
170+ if (gsrc.type () == CV_8UC1)
171+ nppSafeCall ( nppiMean_StdDev_8u_C1R (gsrc.ptr <Npp8u>(), static_cast <int >(gsrc.step ), sz, buf.ptr <Npp8u>(), gdst.ptr <Npp64f>(), gdst.ptr <Npp64f>() + 1 ) );
172+ else
173+ nppSafeCall ( nppiMean_StdDev_32f_C1R (gsrc.ptr <Npp32f>(), static_cast <int >(gsrc.step ), sz, buf.ptr <Npp8u>(), gdst.ptr <Npp64f>(), gdst.ptr <Npp64f>() + 1 ) );
174+
175+ syncOutput (gdst, dst, stream);
176+ }
177+
178+ void cv::cuda::meanStdDev (InputArray src, Scalar& mean, Scalar& stddev)
179+ {
180+ Stream& stream = Stream::Null ();
181+
182+ HostMem dst;
183+ meanStdDev (src, dst, stream);
184+
185+ stream.waitForCompletion ();
186+
187+ double vals[2 ];
188+ dst.createMatHeader ().copyTo (Mat (1 , 2 , CV_64FC1, &vals[0 ]));
164189
165- syncOutput (dst, _dst, stream);
190+ mean = Scalar (vals[0 ]);
191+ stddev = Scalar (vals[1 ]);
166192}
167193
168- void cv::cuda::meanStdDev (InputArray _src, Scalar& mean, Scalar& stddev)
194+ void cv::cuda::meanStdDev (InputArray _src, InputArray _mask, Scalar& mean, Scalar& stddev)
169195{
170196 Stream& stream = Stream::Null ();
171197
172198 HostMem dst;
173- meanStdDev (_src, dst, stream);
199+ meanStdDev (_src, _mask, dst, stream);
174200
175201 stream.waitForCompletion ();
176202
@@ -181,6 +207,49 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
181207 stddev = Scalar (vals[1 ]);
182208}
183209
210+ void cv::cuda::meanStdDev (InputArray src, InputArray mask, OutputArray dst, Stream& stream)
211+ {
212+ if (!deviceSupports (FEATURE_SET_COMPUTE_13))
213+ CV_Error (cv::Error::StsNotImplemented, " Not sufficient compute capebility" );
214+
215+ const GpuMat gsrc = getInputMat (src, stream);
216+ const GpuMat gmask = getInputMat (mask, stream);
217+
218+ #if (CUDA_VERSION <= 4020)
219+ CV_Assert ( gsrc.type () == CV_8UC1 );
220+ #else
221+ CV_Assert ( (gsrc.type () == CV_8UC1) || (gsrc.type () == CV_32FC1) );
222+ #endif
223+
224+ GpuMat gdst = getOutputMat (dst, 1 , 2 , CV_64FC1, stream);
225+
226+ NppiSize sz;
227+ sz.width = gsrc.cols ;
228+ sz.height = gsrc.rows ;
229+
230+ int bufSize;
231+ #if (CUDA_VERSION <= 4020)
232+ nppSafeCall ( nppiMeanStdDev8uC1MRGetBufferHostSize (sz, &bufSize) );
233+ #else
234+ if (gsrc.type () == CV_8UC1)
235+ nppSafeCall ( nppiMeanStdDevGetBufferHostSize_8u_C1MR (sz, &bufSize) );
236+ else
237+ nppSafeCall ( nppiMeanStdDevGetBufferHostSize_32f_C1MR (sz, &bufSize) );
238+ #endif
239+
240+ BufferPool pool (stream);
241+ GpuMat buf = pool.getBuffer (1 , bufSize, gsrc.type ());
242+
243+ if (gsrc.type () == CV_8UC1)
244+ nppSafeCall ( nppiMean_StdDev_8u_C1MR (gsrc.ptr <Npp8u>(), static_cast <int >(gsrc.step ), gmask.ptr <Npp8u>(), static_cast <int >(gmask.step ),
245+ sz, buf.ptr <Npp8u>(), gdst.ptr <Npp64f>(), gdst.ptr <Npp64f>() + 1 ) );
246+ else
247+ nppSafeCall ( nppiMean_StdDev_32f_C1MR (gsrc.ptr <Npp32f>(), static_cast <int >(gsrc.step ), gmask.ptr <Npp8u>(), static_cast <int >(gmask.step ),
248+ sz, buf.ptr <Npp8u>(), gdst.ptr <Npp64f>(), gdst.ptr <Npp64f>() + 1 ) );
249+
250+ syncOutput (gdst, dst, stream);
251+ }
252+
184253// ////////////////////////////////////////////////////////////////////////////
185254// rectStdDev
186255
0 commit comments