Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions modules/cudaarithm/include/opencv2/cudaarithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,21 +685,39 @@ CV_EXPORTS_W void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp,

/** @brief Computes a mean value and a standard deviation of matrix elements.

@param mtx Source matrix. CV_8UC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param dst Target GpuMat with size 1x2 and type CV_64FC1. The first value is mean, the second - stddev.
@param mask Operation mask.
@param stream Stream for the asynchronous version.

@sa meanStdDev
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
/** @overload */
CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stream& stream = Stream::Null());
/** @overload
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param dst Target GpuMat with size 1x2 and type CV_64FC1. The first value is mean, the second - stddev.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null());
/** @overload
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
@param mask Operation mask.
*/
CV_EXPORTS_W void meanStdDev(InputArray src, CV_OUT Scalar& mean, CV_OUT Scalar& stddev, InputArray mask);
/** @overload
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, CV_OUT Scalar& mean, CV_OUT Scalar& stddev);

/** @brief Computes a standard deviation of integral images.

@param src Source image. Only the CV_32SC1 type is supported.
@param sqr Squared source image. Only the CV_32FC1 type is supported.
@param dst Destination image with the same type and size as src .
@param dst Destination image with the same type and size as src.
@param rect Rectangular window.
@param stream Stream for the asynchronous version.
*/
Expand Down
93 changes: 81 additions & 12 deletions modules/cudaarithm/src/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,45 +132,71 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
////////////////////////////////////////////////////////////////////////
// meanStdDev

void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
void cv::cuda::meanStdDev(InputArray src, OutputArray dst, Stream& stream)
{
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");

const GpuMat src = getInputMat(_src, stream);
const GpuMat gsrc = getInputMat(src, stream);

CV_Assert( src.type() == CV_8UC1 );
#if (CUDA_VERSION <= 4020)
CV_Assert( gsrc.type() == CV_8UC1 );
#else
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
#endif

GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);

NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
sz.width = gsrc.cols;
sz.height = gsrc.rows;

int bufSize;
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
#else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
if (gsrc.type() == CV_8UC1)
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1R(sz, &bufSize) );
#endif

BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());

// detail: https://github.com/opencv/opencv/issues/11063
//NppStreamHandler h(StreamAccessor::getStream(stream));

nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
if(gsrc.type() == CV_8UC1)
nppSafeCall( nppiMean_StdDev_8u_C1R(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
else
nppSafeCall( nppiMean_StdDev_32f_C1R(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );

syncOutput(gdst, dst, stream);
}

void cv::cuda::meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
{
Stream& stream = Stream::Null();

HostMem dst;
meanStdDev(src, dst, stream);

stream.waitForCompletion();

double vals[2];
dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0]));

syncOutput(dst, _dst, stream);
mean = Scalar(vals[0]);
stddev = Scalar(vals[1]);
}

void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, InputArray _mask)
{
Stream& stream = Stream::Null();

HostMem dst;
meanStdDev(_src, dst, stream);
meanStdDev(_src, dst, _mask, stream);

stream.waitForCompletion();

Expand All @@ -181,6 +207,49 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
stddev = Scalar(vals[1]);
}

void cv::cuda::meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stream& stream)
{
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");

const GpuMat gsrc = getInputMat(src, stream);
const GpuMat gmask = getInputMat(mask, stream);

#if (CUDA_VERSION <= 4020)
CV_Assert( gsrc.type() == CV_8UC1 );
#else
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
#endif

GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);

NppiSize sz;
sz.width = gsrc.cols;
sz.height = gsrc.rows;

int bufSize;
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiMeanStdDev8uC1MRGetBufferHostSize(sz, &bufSize) );
#else
if (gsrc.type() == CV_8UC1)
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1MR(sz, &bufSize) );
else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1MR(sz, &bufSize) );
#endif

BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());

if(gsrc.type() == CV_8UC1)
nppSafeCall( nppiMean_StdDev_8u_C1MR(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
else
nppSafeCall( nppiMean_StdDev_32f_C1MR(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );

syncOutput(gdst, dst, stream);
}

//////////////////////////////////////////////////////////////////////////////
// rectStdDev

Expand Down
68 changes: 64 additions & 4 deletions modules/cudaarithm/test/test_reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,25 +967,27 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Normalize, testing::Combine(
////////////////////////////////////////////////////////////////////////////////
// MeanStdDev

PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi)
PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi, MatDepth)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
bool useRoi;
int MatDepth;

virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
useRoi = GET_PARAM(2);
MatDepth = GET_PARAM(3);

cv::cuda::setDevice(devInfo.deviceID());
}
};

CUDA_TEST_P(MeanStdDev, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::Mat src = randomMat(size, MatDepth);

if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
{
Expand Down Expand Up @@ -1015,9 +1017,42 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
}
}

CUDA_TEST_P(MeanStdDev, MaskedAccuracy)
{
cv::Mat src = randomMat(size, MatDepth);
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);

if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
{
try
{
cv::Scalar mean;
cv::Scalar stddev;
cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
}
}
else
{
cv::Scalar mean;
cv::Scalar stddev;
cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev, loadMat(mask));

cv::Scalar mean_gold;
cv::Scalar stddev_gold;
cv::meanStdDev(src, mean_gold, stddev_gold, mask);

EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
}
}

CUDA_TEST_P(MeanStdDev, Async)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::Mat src = randomMat(size, MatDepth);

cv::cuda::Stream stream;

Expand All @@ -1037,10 +1072,35 @@ CUDA_TEST_P(MeanStdDev, Async)
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
}

CUDA_TEST_P(MeanStdDev, MaskedAsync)
{
cv::Mat src = randomMat(size, MatDepth);
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);

cv::cuda::Stream stream;

cv::cuda::HostMem dst;
cv::cuda::meanStdDev(loadMat(src, useRoi), dst, loadMat(mask), stream);

stream.waitForCompletion();

double vals[2];
dst.createMatHeader().copyTo(cv::Mat(1, 2, CV_64FC1, &vals[0]));

cv::Scalar mean_gold;
cv::Scalar stddev_gold;
cv::meanStdDev(src, mean_gold, stddev_gold, mask);

EXPECT_SCALAR_NEAR(mean_gold, cv::Scalar(vals[0]), 1e-5);
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
}

INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
WHOLE_SUBMAT));
WHOLE_SUBMAT,
testing::Values(MatDepth(CV_8U), MatDepth(CV_32F))
));

///////////////////////////////////////////////////////////////////////////////////////////////////////
// Integral
Expand Down