Skip to content

Commit e380e38

Browse files
committed
Added 32FC1 type support and mask to cuda::meanStdDev implementation.
1 parent d4719b2 commit e380e38

File tree

3 files changed

+168
-21
lines changed

3 files changed

+168
-21
lines changed

modules/cudaarithm/include/opencv2/cudaarithm.hpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -685,15 +685,33 @@ CV_EXPORTS_W void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp,
685685

686686
/** @brief Computes a mean value and a standard deviation of matrix elements.
687687
688-
@param mtx Source matrix. CV_8UC1 matrices are supported for now.
689-
@param mean Mean value.
690-
@param stddev Standard deviation value.
688+
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
689+
@param mask Operation mask.
690+
@param dst Desitnation buffer for mean and stddev.
691+
@param stream Cuda stream for execution.
691692
692693
@sa meanStdDev
693694
*/
694-
CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
695-
/** @overload */
695+
CV_EXPORTS_W void meanStdDev(InputArray src, InputArray mask, OutputArray dst, Stream& stream = Stream::Null());
696+
/** @overload
697+
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
698+
@param dst Desitnation buffer for mean and stddev.
699+
@param stream Cuda stream for execution.
700+
*/
696701
CV_EXPORTS_W void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null());
702+
/** @overload
703+
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
704+
@param mask Operation mask.
705+
@param mean Mean value.
706+
@param stddev Standard deviation value.
707+
*/
708+
CV_EXPORTS_W void meanStdDev(InputArray src, InputArray mask, Scalar& mean, Scalar& stddev);
709+
/** @overload
710+
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
711+
@param mean Mean value.
712+
@param stddev Standard deviation value.
713+
*/
714+
CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
697715

698716
/** @brief Computes a standard deviation of integral images.
699717

modules/cudaarithm/src/reductions.cpp

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -132,45 +132,71 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
132132
////////////////////////////////////////////////////////////////////////
133133
// meanStdDev
134134

135-
void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
135+
void cv::cuda::meanStdDev(InputArray src, OutputArray dst, Stream& stream)
136136
{
137137
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
138138
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
139139

140-
const GpuMat src = getInputMat(_src, stream);
140+
const GpuMat gsrc = getInputMat(src, stream);
141141

142-
CV_Assert( src.type() == CV_8UC1 );
142+
#if (CUDA_VERSION <= 4020)
143+
CV_Assert( gsrc.type() == CV_8UC1 );
144+
#else
145+
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
146+
#endif
143147

144-
GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
148+
GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);
145149

146150
NppiSize sz;
147-
sz.width = src.cols;
148-
sz.height = src.rows;
151+
sz.width = gsrc.cols;
152+
sz.height = gsrc.rows;
149153

150154
int bufSize;
151155
#if (CUDA_VERSION <= 4020)
152156
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
153157
#else
154-
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
158+
if (gsrc.type() == CV_8UC1)
159+
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
160+
else
161+
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1R(sz, &bufSize) );
155162
#endif
156163

157164
BufferPool pool(stream);
158-
GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);
165+
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
159166

160167
// detail: https://github.com/opencv/opencv/issues/11063
161168
//NppStreamHandler h(StreamAccessor::getStream(stream));
162169

163-
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
170+
if(gsrc.type() == CV_8UC1)
171+
nppSafeCall( nppiMean_StdDev_8u_C1R(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
172+
else
173+
nppSafeCall( nppiMean_StdDev_32f_C1R(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
174+
175+
syncOutput(gdst, dst, stream);
176+
}
177+
178+
void cv::cuda::meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
179+
{
180+
Stream& stream = Stream::Null();
181+
182+
HostMem dst;
183+
meanStdDev(src, dst, stream);
184+
185+
stream.waitForCompletion();
186+
187+
double vals[2];
188+
dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0]));
164189

165-
syncOutput(dst, _dst, stream);
190+
mean = Scalar(vals[0]);
191+
stddev = Scalar(vals[1]);
166192
}
167193

168-
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
194+
void cv::cuda::meanStdDev(InputArray _src, InputArray _mask, Scalar& mean, Scalar& stddev)
169195
{
170196
Stream& stream = Stream::Null();
171197

172198
HostMem dst;
173-
meanStdDev(_src, dst, stream);
199+
meanStdDev(_src, _mask, dst, stream);
174200

175201
stream.waitForCompletion();
176202

@@ -181,6 +207,49 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
181207
stddev = Scalar(vals[1]);
182208
}
183209

210+
void cv::cuda::meanStdDev(InputArray src, InputArray mask, OutputArray dst, Stream& stream)
211+
{
212+
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
213+
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
214+
215+
const GpuMat gsrc = getInputMat(src, stream);
216+
const GpuMat gmask = getInputMat(mask, stream);
217+
218+
#if (CUDA_VERSION <= 4020)
219+
CV_Assert( gsrc.type() == CV_8UC1 );
220+
#else
221+
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
222+
#endif
223+
224+
GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);
225+
226+
NppiSize sz;
227+
sz.width = gsrc.cols;
228+
sz.height = gsrc.rows;
229+
230+
int bufSize;
231+
#if (CUDA_VERSION <= 4020)
232+
nppSafeCall( nppiMeanStdDev8uC1MRGetBufferHostSize(sz, &bufSize) );
233+
#else
234+
if (gsrc.type() == CV_8UC1)
235+
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1MR(sz, &bufSize) );
236+
else
237+
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1MR(sz, &bufSize) );
238+
#endif
239+
240+
BufferPool pool(stream);
241+
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
242+
243+
if(gsrc.type() == CV_8UC1)
244+
nppSafeCall( nppiMean_StdDev_8u_C1MR(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
245+
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
246+
else
247+
nppSafeCall( nppiMean_StdDev_32f_C1MR(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
248+
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
249+
250+
syncOutput(gdst, dst, stream);
251+
}
252+
184253
//////////////////////////////////////////////////////////////////////////////
185254
// rectStdDev
186255

modules/cudaarithm/test/test_reductions.cpp

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -967,25 +967,27 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Normalize, testing::Combine(
967967
////////////////////////////////////////////////////////////////////////////////
968968
// MeanStdDev
969969

970-
PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi)
970+
PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi, MatDepth)
971971
{
972972
cv::cuda::DeviceInfo devInfo;
973973
cv::Size size;
974974
bool useRoi;
975+
int MatDepth;
975976

976977
virtual void SetUp()
977978
{
978979
devInfo = GET_PARAM(0);
979980
size = GET_PARAM(1);
980981
useRoi = GET_PARAM(2);
982+
MatDepth = GET_PARAM(3);
981983

982984
cv::cuda::setDevice(devInfo.deviceID());
983985
}
984986
};
985987

986988
CUDA_TEST_P(MeanStdDev, Accuracy)
987989
{
988-
cv::Mat src = randomMat(size, CV_8UC1);
990+
cv::Mat src = randomMat(size, MatDepth);
989991

990992
if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
991993
{
@@ -1015,9 +1017,42 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
10151017
}
10161018
}
10171019

1020+
CUDA_TEST_P(MeanStdDev, MaskedAccuracy)
1021+
{
1022+
cv::Mat src = randomMat(size, MatDepth);
1023+
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
1024+
1025+
if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
1026+
{
1027+
try
1028+
{
1029+
cv::Scalar mean;
1030+
cv::Scalar stddev;
1031+
cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev);
1032+
}
1033+
catch (const cv::Exception& e)
1034+
{
1035+
ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
1036+
}
1037+
}
1038+
else
1039+
{
1040+
cv::Scalar mean;
1041+
cv::Scalar stddev;
1042+
cv::cuda::meanStdDev(loadMat(src, useRoi), loadMat(mask), mean, stddev);
1043+
1044+
cv::Scalar mean_gold;
1045+
cv::Scalar stddev_gold;
1046+
cv::meanStdDev(src, mean_gold, stddev_gold, mask);
1047+
1048+
EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
1049+
EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
1050+
}
1051+
}
1052+
10181053
CUDA_TEST_P(MeanStdDev, Async)
10191054
{
1020-
cv::Mat src = randomMat(size, CV_8UC1);
1055+
cv::Mat src = randomMat(size, MatDepth);
10211056

10221057
cv::cuda::Stream stream;
10231058

@@ -1037,10 +1072,35 @@ CUDA_TEST_P(MeanStdDev, Async)
10371072
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
10381073
}
10391074

1075+
CUDA_TEST_P(MeanStdDev, MaskedAsync)
1076+
{
1077+
cv::Mat src = randomMat(size, MatDepth);
1078+
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
1079+
1080+
cv::cuda::Stream stream;
1081+
1082+
cv::cuda::HostMem dst;
1083+
cv::cuda::meanStdDev(loadMat(src, useRoi), loadMat(mask), dst, stream);
1084+
1085+
stream.waitForCompletion();
1086+
1087+
double vals[2];
1088+
dst.createMatHeader().copyTo(cv::Mat(1, 2, CV_64FC1, &vals[0]));
1089+
1090+
cv::Scalar mean_gold;
1091+
cv::Scalar stddev_gold;
1092+
cv::meanStdDev(src, mean_gold, stddev_gold, mask);
1093+
1094+
EXPECT_SCALAR_NEAR(mean_gold, cv::Scalar(vals[0]), 1e-5);
1095+
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
1096+
}
1097+
10401098
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
10411099
ALL_DEVICES,
10421100
DIFFERENT_SIZES,
1043-
WHOLE_SUBMAT));
1101+
WHOLE_SUBMAT,
1102+
testing::Values(MatDepth(CV_8U), MatDepth(CV_32F))
1103+
));
10441104

10451105
///////////////////////////////////////////////////////////////////////////////////////////////////////
10461106
// Integral

0 commit comments

Comments
 (0)