Skip to content

Commit 38d5db7

Browse files
committed
Added OCL versions of Sr and Sc functions
1 parent 44e14b3 commit 38d5db7

File tree

6 files changed

+334
-3
lines changed

6 files changed

+334
-3
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
6+
// Third party copyrights are property of their respective owners.
7+
8+
9+
10+
__kernel void NCC(__global const uchar *patch,
11+
__global const uchar *positiveSamples,
12+
__global const uchar *negativeSamples,
13+
__global float *ncc,
14+
int posNum,
15+
int negNum)
16+
{
17+
int id = get_global_id(0);
18+
if (id >= 1000) return;
19+
bool posFlg;
20+
21+
if (id < 500)
22+
posFlg = true;
23+
if (id >= 500)
24+
{
25+
//Negative index
26+
id = id - 500;
27+
posFlg = false;
28+
}
29+
30+
//Variables
31+
int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
32+
float sq1 = 0, sq2 = 0, ares = 0;
33+
int N = 225;
34+
//NCC with positive patch
35+
if (posFlg && id < posNum)
36+
{
37+
for (int i = 0; i < N; i++)
38+
{
39+
40+
s1 += positiveSamples[id * N + i];
41+
s2 += patch[i];
42+
n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i];
43+
n2 += patch[i] * patch[i];
44+
prod += positiveSamples[id * N + i] * patch[i];
45+
}
46+
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
47+
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
48+
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
49+
ncc[id] = ares;
50+
}
51+
52+
//NCC with negative patch
53+
if (!posFlg && id < negNum)
54+
{
55+
for (int i = 0; i < N; i++)
56+
{
57+
58+
s1 += negativeSamples[id * N + i];
59+
s2 += patch[i];
60+
n1 += negativeSamples[id * N + i] * negativeSamples[id * N + i];
61+
n2 += patch[i] * patch[i];
62+
prod += negativeSamples[id * N + i] * patch[i];
63+
}
64+
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
65+
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
66+
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
67+
ncc[id+500] = ares;
68+
}
69+
}

modules/tracking/src/precomp.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444

4545
#include "opencv2/tracking.hpp"
4646
#include "opencv2/core/utility.hpp"
47+
#include "opencv2/core/ocl.hpp"
4748

4849
namespace cv
4950
{

modules/tracking/src/tldDetector.cpp

Lines changed: 227 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,119 @@ namespace cv
6565
// Calculate Relative similarity of the patch (NN-Model)
6666
double TLDDetector::Sr(const Mat_<uchar>& patch)
6767
{
68+
/*
69+
int64 e1, e2;
70+
float t;
71+
e1 = getTickCount();
6872
double splus = 0.0, sminus = 0.0;
6973
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
7074
splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0));
7175
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
7276
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
77+
e2 = getTickCount();
78+
t = (e2 - e1) / getTickFrequency()*1000.0;
79+
printf("Sr: %f\n", t);
80+
if (splus + sminus == 0.0)
81+
return 0.0;
82+
return splus / (sminus + splus);
83+
*/
84+
int64 e1, e2;
85+
float t;
86+
e1 = getTickCount();
87+
double splus = 0.0, sminus = 0.0;
88+
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
89+
for (int i = 0; i < *posNum; i++)
90+
{
91+
modelSample.data = &(posExp->data[i * 225]);
92+
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
93+
}
94+
for (int i = 0; i < *negNum; i++)
95+
{
96+
modelSample.data = &(negExp->data[i * 225]);
97+
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
98+
}
99+
e2 = getTickCount();
100+
t = (e2 - e1) / getTickFrequency()*1000.0;
101+
printf("Sr CPU: %f\n", t);
102+
if (splus + sminus == 0.0)
103+
return 0.0;
104+
return splus / (sminus + splus);
105+
}
106+
107+
double TLDDetector::ocl_Sr(const Mat_<uchar>& patch)
108+
{
109+
int64 e1, e2, e3, e4;
110+
float t;
111+
e1 = getTickCount();
112+
double splus = 0.0, sminus = 0.0;
113+
114+
e3 = getTickCount();
115+
116+
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
117+
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
118+
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
119+
UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
120+
121+
122+
ocl::Kernel k;
123+
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
124+
String error;
125+
ocl::Program prog(src, NULL, error);
126+
k.create("NCC", prog);
127+
if (k.empty())
128+
printf("Kernel create failed!!!\n");
129+
k.args(
130+
ocl::KernelArg::PtrReadOnly(devPatch),
131+
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
132+
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
133+
ocl::KernelArg::PtrWriteOnly(devNCC),
134+
(int)posNum,
135+
(int)negNum);
136+
137+
e4 = getTickCount();
138+
t = (e4 - e3) / getTickFrequency()*1000.0;
139+
//printf("Mem Cpy GPU: %f\n", t);
140+
141+
size_t globSize = 1000;
142+
size_t localSize = 128;
143+
e3 = getTickCount();
144+
if (!k.run(1, &globSize, &localSize, true))
145+
printf("Kernel Run Error!!!");
146+
e4 = getTickCount();
147+
t = (e4 - e3) / getTickFrequency()*1000.0;
148+
//printf("Kernel Run GPU: %f\n", t);
149+
150+
e3 = getTickCount();
151+
Mat resNCC = devNCC.getMat(ACCESS_READ);
152+
e4 = getTickCount();
153+
t = (e4 - e3) / getTickFrequency()*1000.0;
154+
//printf("Read Mem GPU: %f\n", t);
155+
156+
////Compare
157+
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
158+
//for (int i = 0; i < 200; i+=17)
159+
//{
160+
// modelSample.data = &(posExp->data[i * 225]);
161+
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
162+
//}
163+
164+
//for (int i = 0; i < 200; i+=23)
165+
//{
166+
// modelSample.data = &(negExp->data[i * 225]);
167+
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
168+
//}
169+
170+
171+
for (int i = 0; i < *posNum; i++)
172+
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0));
173+
174+
for (int i = 0; i < *negNum; i++)
175+
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0));
176+
177+
e2 = getTickCount();
178+
t = (e2 - e1) / getTickFrequency()*1000.0;
179+
//printf("Sr GPU: %f\n\n", t);
180+
73181
if (splus + sminus == 0.0)
74182
return 0.0;
75183
return splus / (sminus + splus);
@@ -78,6 +186,10 @@ namespace cv
78186
// Calculate Conservative similarity of the patch (NN-Model)
79187
double TLDDetector::Sc(const Mat_<uchar>& patch)
80188
{
189+
/*
190+
int64 e1, e2;
191+
float t;
192+
e1 = getTickCount();
81193
double splus = 0.0, sminus = 0.0;
82194
int med = getMedian((*timeStampsPositive));
83195
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
@@ -87,6 +199,118 @@ namespace cv
87199
}
88200
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
89201
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
202+
e2 = getTickCount();
203+
t = (e2 - e1) / getTickFrequency()*1000.0;
204+
printf("Sc: %f\n", t);
205+
if (splus + sminus == 0.0)
206+
return 0.0;
207+
208+
return splus / (sminus + splus);
209+
*/
210+
211+
int64 e1, e2;
212+
float t;
213+
e1 = getTickCount();
214+
double splus = 0.0, sminus = 0.0;
215+
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
216+
int med = getMedian((*timeStampsPositive));
217+
for (int i = 0; i < *posNum; i++)
218+
{
219+
if ((int)(*timeStampsPositive)[i] <= med)
220+
{
221+
modelSample.data = &(posExp->data[i * 225]);
222+
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
223+
}
224+
}
225+
for (int i = 0; i < *negNum; i++)
226+
{
227+
modelSample.data = &(negExp->data[i * 225]);
228+
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
229+
}
230+
e2 = getTickCount();
231+
t = (e2 - e1) / getTickFrequency()*1000.0;
232+
printf("Sc: %f\n", t);
233+
if (splus + sminus == 0.0)
234+
return 0.0;
235+
236+
return splus / (sminus + splus);
237+
}
238+
239+
double TLDDetector::ocl_Sc(const Mat_<uchar>& patch)
240+
{
241+
int64 e1, e2, e3, e4;
242+
float t;
243+
e1 = getTickCount();
244+
double splus = 0.0, sminus = 0.0;
245+
246+
e3 = getTickCount();
247+
248+
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
249+
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
250+
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
251+
UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
252+
253+
254+
ocl::Kernel k;
255+
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
256+
String error;
257+
ocl::Program prog(src, NULL, error);
258+
k.create("NCC", prog);
259+
if (k.empty())
260+
printf("Kernel create failed!!!\n");
261+
k.args(
262+
ocl::KernelArg::PtrReadOnly(devPatch),
263+
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
264+
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
265+
ocl::KernelArg::PtrWriteOnly(devNCC),
266+
(int)posNum,
267+
(int)negNum);
268+
269+
e4 = getTickCount();
270+
t = (e4 - e3) / getTickFrequency()*1000.0;
271+
//printf("Mem Cpy GPU: %f\n", t);
272+
273+
size_t globSize = 1000;
274+
size_t localSize = 128;
275+
e3 = getTickCount();
276+
if (!k.run(1, &globSize, &localSize, true))
277+
printf("Kernel Run Error!!!");
278+
e4 = getTickCount();
279+
t = (e4 - e3) / getTickFrequency()*1000.0;
280+
//printf("Kernel Run GPU: %f\n", t);
281+
282+
e3 = getTickCount();
283+
Mat resNCC = devNCC.getMat(ACCESS_READ);
284+
e4 = getTickCount();
285+
t = (e4 - e3) / getTickFrequency()*1000.0;
286+
//printf("Read Mem GPU: %f\n", t);
287+
288+
////Compare
289+
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
290+
//for (int i = 0; i < 200; i+=17)
291+
//{
292+
// modelSample.data = &(posExp->data[i * 225]);
293+
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
294+
//}
295+
296+
//for (int i = 0; i < 200; i+=23)
297+
//{
298+
// modelSample.data = &(negExp->data[i * 225]);
299+
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
300+
//}
301+
302+
int med = getMedian((*timeStampsPositive));
303+
for (int i = 0; i < *posNum; i++)
304+
if ((int)(*timeStampsPositive)[i] <= med)
305+
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0));
306+
307+
for (int i = 0; i < *negNum; i++)
308+
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0));
309+
310+
e2 = getTickCount();
311+
t = (e2 - e1) / getTickFrequency()*1000.0;
312+
//printf("Sc GPU: %f\n\n", t);
313+
90314
if (splus + sminus == 0.0)
91315
return 0.0;
92316
return splus / (sminus + splus);
@@ -166,7 +390,8 @@ namespace cv
166390

167391
labPatch.rect = Rect2d(dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale);
168392
resample(resized_img, Rect2d(Point(dx * i, dy * j), initSize), standardPatch);
169-
tmp = Sr(standardPatch);
393+
394+
tmp = ocl_Sr(standardPatch);
170395

171396
////To fix: Check the paper, probably this cause wrong learning
172397
//
@@ -184,7 +409,7 @@ namespace cv
184409
{
185410
npos++;
186411
}
187-
tmp = Sc(standardPatch);
412+
tmp = ocl_Sc(standardPatch);
188413
if (tmp > maxSc)
189414
{
190415
maxSc = tmp;

modules/tracking/src/tldDetector.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#define OPENCV_TLD_DETECTOR
4444

4545
#include "precomp.hpp"
46+
#include "opencl_kernels_tracking.hpp"
4647
#include "tldEnsembleClassifier.hpp"
4748
#include "tldUtils.hpp"
4849

@@ -73,9 +74,13 @@ namespace cv
7374
inline double ensembleClassifierNum(const uchar* data);
7475
inline void prepareClassifiers(int rowstep);
7576
double Sr(const Mat_<uchar>& patch);
77+
double ocl_Sr(const Mat_<uchar>& patch);
7678
double Sc(const Mat_<uchar>& patch);
79+
double ocl_Sc(const Mat_<uchar>& patch);
7780

7881
std::vector<TLDEnsembleClassifier> classifiers;
82+
Mat *posExp, *negExp;
83+
int *posNum, *negNum;
7984
std::vector<Mat_<uchar> > *positiveExamples, *negativeExamples;
8085
std::vector<int> *timeStampsPositive, *timeStampsNegative;
8186
double *originalVariancePtr;
@@ -87,6 +92,7 @@ namespace cv
8792
bool isObject, shouldBeIntegrated;
8893
};
8994
bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
95+
bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
9096
protected:
9197

9298

0 commit comments

Comments
 (0)