@@ -65,11 +65,119 @@ namespace cv
6565 // Calculate Relative similarity of the patch (NN-Model)
6666 double TLDDetector::Sr (const Mat_<uchar>& patch)
6767 {
68+ /*
69+ int64 e1, e2;
70+ float t;
71+ e1 = getTickCount();
6872 double splus = 0.0, sminus = 0.0;
6973 for (int i = 0; i < (int)(*positiveExamples).size(); i++)
7074 splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0));
7175 for (int i = 0; i < (int)(*negativeExamples).size(); i++)
7276 sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
77+ e2 = getTickCount();
78+ t = (e2 - e1) / getTickFrequency()*1000.0;
79+ printf("Sr: %f\n", t);
80+ if (splus + sminus == 0.0)
81+ return 0.0;
82+ return splus / (sminus + splus);
83+ */
84+ int64 e1 , e2 ;
85+ float t;
86+ e1 = getTickCount ();
87+ double splus = 0.0 , sminus = 0.0 ;
88+ Mat_<uchar> modelSample (STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
89+ for (int i = 0 ; i < *posNum; i++)
90+ {
91+ modelSample.data = &(posExp->data [i * 225 ]);
92+ splus = std::max (splus, 0.5 * (NCC (modelSample, patch) + 1.0 ));
93+ }
94+ for (int i = 0 ; i < *negNum; i++)
95+ {
96+ modelSample.data = &(negExp->data [i * 225 ]);
97+ sminus = std::max (sminus, 0.5 * (NCC (modelSample, patch) + 1.0 ));
98+ }
99+ e2 = getTickCount ();
100+ t = (e2 - e1 ) / getTickFrequency ()*1000.0 ;
101+ printf (" Sr CPU: %f\n " , t);
102+ if (splus + sminus == 0.0 )
103+ return 0.0 ;
104+ return splus / (sminus + splus);
105+ }
106+
107+ double TLDDetector::ocl_Sr (const Mat_<uchar>& patch)
108+ {
109+ int64 e1 , e2 , e3 , e4 ;
110+ float t;
111+ e1 = getTickCount ();
112+ double splus = 0.0 , sminus = 0.0 ;
113+
114+ e3 = getTickCount ();
115+
116+ UMat devPatch = patch.getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
117+ UMat devPositiveSamples = posExp->getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
118+ UMat devNegativeSamples = negExp->getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
119+ UMat devNCC (1 , 2 *MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
120+
121+
122+ ocl::Kernel k;
123+ ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
124+ String error;
125+ ocl::Program prog (src, NULL , error);
126+ k.create (" NCC" , prog);
127+ if (k.empty ())
128+ printf (" Kernel create failed!!!\n " );
129+ k.args (
130+ ocl::KernelArg::PtrReadOnly (devPatch),
131+ ocl::KernelArg::PtrReadOnly (devPositiveSamples),
132+ ocl::KernelArg::PtrReadOnly (devNegativeSamples),
133+ ocl::KernelArg::PtrWriteOnly (devNCC),
134+ (int )posNum,
135+ (int )negNum);
136+
137+ e4 = getTickCount ();
138+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
139+ // printf("Mem Cpy GPU: %f\n", t);
140+
141+ size_t globSize = 1000 ;
142+ size_t localSize = 128 ;
143+ e3 = getTickCount ();
144+ if (!k.run (1 , &globSize, &localSize, true ))
145+ printf (" Kernel Run Error!!!" );
146+ e4 = getTickCount ();
147+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
148+ // printf("Kernel Run GPU: %f\n", t);
149+
150+ e3 = getTickCount ();
151+ Mat resNCC = devNCC.getMat (ACCESS_READ);
152+ e4 = getTickCount ();
153+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
154+ // printf("Read Mem GPU: %f\n", t);
155+
156+ // //Compare
157+ // Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
158+ // for (int i = 0; i < 200; i+=17)
159+ // {
160+ // modelSample.data = &(posExp->data[i * 225]);
161+ // printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
162+ // }
163+
164+ // for (int i = 0; i < 200; i+=23)
165+ // {
166+ // modelSample.data = &(negExp->data[i * 225]);
167+ // printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
168+ // }
169+
170+
171+ for (int i = 0 ; i < *posNum; i++)
172+ splus = std::max (splus, 0.5 * (resNCC.at <float >(i) + 1.0 ));
173+
174+ for (int i = 0 ; i < *negNum; i++)
175+ sminus = std::max (sminus, 0.5 * (resNCC.at <float >(i+500 ) +1.0 ));
176+
177+ e2 = getTickCount ();
178+ t = (e2 - e1 ) / getTickFrequency ()*1000.0 ;
179+ // printf("Sr GPU: %f\n\n", t);
180+
73181 if (splus + sminus == 0.0 )
74182 return 0.0 ;
75183 return splus / (sminus + splus);
@@ -78,6 +186,10 @@ namespace cv
78186 // Calculate Conservative similarity of the patch (NN-Model)
79187 double TLDDetector::Sc (const Mat_<uchar>& patch)
80188 {
189+ /*
190+ int64 e1, e2;
191+ float t;
192+ e1 = getTickCount();
81193 double splus = 0.0, sminus = 0.0;
82194 int med = getMedian((*timeStampsPositive));
83195 for (int i = 0; i < (int)(*positiveExamples).size(); i++)
@@ -87,6 +199,118 @@ namespace cv
87199 }
88200 for (int i = 0; i < (int)(*negativeExamples).size(); i++)
89201 sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
202+ e2 = getTickCount();
203+ t = (e2 - e1) / getTickFrequency()*1000.0;
204+ printf("Sc: %f\n", t);
205+ if (splus + sminus == 0.0)
206+ return 0.0;
207+
208+ return splus / (sminus + splus);
209+ */
210+
211+ int64 e1 , e2 ;
212+ float t;
213+ e1 = getTickCount ();
214+ double splus = 0.0 , sminus = 0.0 ;
215+ Mat_<uchar> modelSample (STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
216+ int med = getMedian ((*timeStampsPositive));
217+ for (int i = 0 ; i < *posNum; i++)
218+ {
219+ if ((int )(*timeStampsPositive)[i] <= med)
220+ {
221+ modelSample.data = &(posExp->data [i * 225 ]);
222+ splus = std::max (splus, 0.5 * (NCC (modelSample, patch) + 1.0 ));
223+ }
224+ }
225+ for (int i = 0 ; i < *negNum; i++)
226+ {
227+ modelSample.data = &(negExp->data [i * 225 ]);
228+ sminus = std::max (sminus, 0.5 * (NCC (modelSample, patch) + 1.0 ));
229+ }
230+ e2 = getTickCount ();
231+ t = (e2 - e1 ) / getTickFrequency ()*1000.0 ;
232+ printf (" Sc: %f\n " , t);
233+ if (splus + sminus == 0.0 )
234+ return 0.0 ;
235+
236+ return splus / (sminus + splus);
237+ }
238+
239+ double TLDDetector::ocl_Sc (const Mat_<uchar>& patch)
240+ {
241+ int64 e1 , e2 , e3 , e4 ;
242+ float t;
243+ e1 = getTickCount ();
244+ double splus = 0.0 , sminus = 0.0 ;
245+
246+ e3 = getTickCount ();
247+
248+ UMat devPatch = patch.getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
249+ UMat devPositiveSamples = posExp->getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
250+ UMat devNegativeSamples = negExp->getUMat (ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
251+ UMat devNCC (1 , 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
252+
253+
254+ ocl::Kernel k;
255+ ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
256+ String error;
257+ ocl::Program prog (src, NULL , error);
258+ k.create (" NCC" , prog);
259+ if (k.empty ())
260+ printf (" Kernel create failed!!!\n " );
261+ k.args (
262+ ocl::KernelArg::PtrReadOnly (devPatch),
263+ ocl::KernelArg::PtrReadOnly (devPositiveSamples),
264+ ocl::KernelArg::PtrReadOnly (devNegativeSamples),
265+ ocl::KernelArg::PtrWriteOnly (devNCC),
266+ (int )posNum,
267+ (int )negNum);
268+
269+ e4 = getTickCount ();
270+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
271+ // printf("Mem Cpy GPU: %f\n", t);
272+
273+ size_t globSize = 1000 ;
274+ size_t localSize = 128 ;
275+ e3 = getTickCount ();
276+ if (!k.run (1 , &globSize, &localSize, true ))
277+ printf (" Kernel Run Error!!!" );
278+ e4 = getTickCount ();
279+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
280+ // printf("Kernel Run GPU: %f\n", t);
281+
282+ e3 = getTickCount ();
283+ Mat resNCC = devNCC.getMat (ACCESS_READ);
284+ e4 = getTickCount ();
285+ t = (e4 - e3 ) / getTickFrequency ()*1000.0 ;
286+ // printf("Read Mem GPU: %f\n", t);
287+
288+ // //Compare
289+ // Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
290+ // for (int i = 0; i < 200; i+=17)
291+ // {
292+ // modelSample.data = &(posExp->data[i * 225]);
293+ // printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
294+ // }
295+
296+ // for (int i = 0; i < 200; i+=23)
297+ // {
298+ // modelSample.data = &(negExp->data[i * 225]);
299+ // printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
300+ // }
301+
302+ int med = getMedian ((*timeStampsPositive));
303+ for (int i = 0 ; i < *posNum; i++)
304+ if ((int )(*timeStampsPositive)[i] <= med)
305+ splus = std::max (splus, 0.5 * (resNCC.at <float >(i) +1.0 ));
306+
307+ for (int i = 0 ; i < *negNum; i++)
308+ sminus = std::max (sminus, 0.5 * (resNCC.at <float >(i + 500 ) + 1.0 ));
309+
310+ e2 = getTickCount ();
311+ t = (e2 - e1 ) / getTickFrequency ()*1000.0 ;
312+ // printf("Sc GPU: %f\n\n", t);
313+
90314 if (splus + sminus == 0.0 )
91315 return 0.0 ;
92316 return splus / (sminus + splus);
@@ -166,7 +390,8 @@ namespace cv
166390
167391 labPatch.rect = Rect2d (dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale);
168392 resample (resized_img, Rect2d (Point (dx * i, dy * j), initSize), standardPatch);
169- tmp = Sr (standardPatch);
393+
394+ tmp = ocl_Sr (standardPatch);
170395
171396 // //To fix: Check the paper, probably this cause wrong learning
172397 //
@@ -184,7 +409,7 @@ namespace cv
184409 {
185410 npos++;
186411 }
187- tmp = Sc (standardPatch);
412+ tmp = ocl_Sc (standardPatch);
188413 if (tmp > maxSc)
189414 {
190415 maxSc = tmp;
0 commit comments