diff --git a/README.md b/README.md index 2210586f653..c3456233c5d 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,11 @@ If you prefer using the gui version of cmake (cmake-gui), then, you can add `ope 6. press the `configure` button followed by the `generate` button (the first time, you will be asked which makefile style to use) 7. build the `opencv` core with the method you chose (make and make install if you chose Unix makfile at step 6) + +### Update the repository documentation + +In order to keep a clean overview containing all contributed modules the following files need to be created/adapted. + +1. Update the README.md file under the modules folder. Here you add your model with a single line description. + +2. Add a README.md inside your own module folder. This README explains which functionality (seperate functions) is available, links to the corresponding samples and explains in somewhat more detail what the module is expected to do. If any extra requirements are needed to build the module without problems, add them here also. \ No newline at end of file diff --git a/modules/README.md b/modules/README.md index db0ca01e696..667997fc062 100644 --- a/modules/README.md +++ b/modules/README.md @@ -1 +1,49 @@ -Please put your module here. \ No newline at end of file +An overview of the contrib modules and a small explanation +---------------------------------------------------------- + +This list gives an overview of all modules available inside the contrib repository. +These are also the correct names for disabling the building of a specific module by adding + +``` +$ cmake -D OPENCV_EXTRA_MODULES_PATH=/modules -D BUILD_opencv_reponame=OFF +``` + +1. **opencv_adas**: Advanced Driver Assistance Systems module with Forward Collision Warning. + +2. **opencv_bgsegm**: Improved Adaptive Background Mixture Model for Real-time Tracking / Visual Tracking of Human Visitors under Variable-Lighting Conditions. + +3. **opencv_bioinspired**: Biologically inspired vision models and derivated tools. + +4. **opencv_ ccalib**: Custom Calibration Pattern for 3D reconstruction. + +5. **opencv_cvv**: GUI for Interactive Visual Debugging of Computer Vision Programs. + +6. **opencv_datasettools**: Tools for working with different datasets. + +7. **opencv_face**: Recently added face recognition software which is not yet stabalized. + +8. **opencv_line_descriptor**: Binary descriptors for lines extracted from an image. + +9. **opencv_matlab**: OpenCV Matlab Code Generator. + +10. **opencv_optflow**: Optical Flow Algorithms for tracking points. + +11. **opencv_reg**: Image Registration module. + +12. **opencv_rgbd**: RGB-Depth Processing module. + +13. **opencv_saliency**: Saliency API, understanding where humans focus given a scene. + +14. **opencv_surface_matching**: Surface Matching Algorithm Through 3D Features. + +15. **opencv_text**: Scene Text Detection and Recognition in Natural Scene Images. + +16. **opencv_tracking**: Long-term optical tracking API. + +17. **opencv_xfeatures2d**: Extra 2D Features Framework containing experimental and non-free 2D feature algorithms. + +18. **opencv_ximgproc**: Extended Image Processing: Structured Forests / Domain Transform Filter / Guided Filter / Adaptive Manifold Filter / Joint Bilateral Filter / Superpixels. + +19. **opencv_xobjdetect**: Integral Channel Features Detector Framework. + +20. **opencv_xphoto**: Additional photo processing algorithms: Color balance / Denoising / Inpainting. \ No newline at end of file diff --git a/modules/adas/README.md b/modules/adas/README.md new file mode 100644 index 00000000000..7ccfb3a0fbd --- /dev/null +++ b/modules/adas/README.md @@ -0,0 +1,2 @@ +ADAS: Advanced Driver Assistance Systems module with Forward Collision Warning +============================================================================== \ No newline at end of file diff --git a/modules/adas/tools/fcw_detect/fcw_detect.cpp b/modules/adas/tools/fcw_detect/fcw_detect.cpp index 47030b06150..318393a5568 100644 --- a/modules/adas/tools/fcw_detect/fcw_detect.cpp +++ b/modules/adas/tools/fcw_detect/fcw_detect.cpp @@ -48,15 +48,29 @@ static Mat visualize(const Mat &image, const vector &objects) } return img; } +static bool read_window_size(const char *str, int *rows, int *cols) +{ + int pos = 0; + if( sscanf(str, "%dx%d%n", rows, cols, &pos) != 2 || str[pos] != '\0' || + *rows <= 0 || *cols <= 0) + { + return false; + } + return true; +} int main(int argc, char *argv[]) { const string keys = - "{help | | print this message}" - "{model_filename | model.xml | filename for reading model}" - "{image_path | test.png | path to image for detection}" - "{out_image_path | out.png | path to image for output}" - "{threshold | 0.0 | threshold for cascade}" + "{help | | print this message}" + "{model_filename | model.xml | filename for reading model}" + "{image_path | test.png | path to image for detection}" + "{out_image_path | out.png | path to image for output}" + "{threshold | 0.0 | threshold for cascade}" + "{step | 8 | sliding window step}" + "{min_window_size | 40x40 | min window size in pixels}" + "{max_window_size | 300x300 | max window size in pixels}" + "{is_grayscale | false | read the image as grayscale}" ; CommandLineParser parser(argc, argv, keys); @@ -71,7 +85,31 @@ int main(int argc, char *argv[]) string model_filename = parser.get("model_filename"); string image_path = parser.get("image_path"); string out_image_path = parser.get("out_image_path"); + bool is_grayscale = parser.get("is_grayscale"); float threshold = parser.get("threshold"); + int step = parser.get("step"); + + int min_rows, min_cols, max_rows, max_cols; + string min_window_size = parser.get("min_window_size"); + if( !read_window_size(min_window_size.c_str(), &min_rows, + &min_cols) ) + { + cerr << "Error reading min window size from `" << min_window_size << "`" << endl; + return 1; + } + string max_window_size = parser.get("max_window_size"); + if( !read_window_size(max_window_size.c_str(), &max_rows, + &max_cols) ) + { + cerr << "Error reading max window size from `" << max_window_size << "`" << endl; + return 1; + } + + int color; + if(is_grayscale == false) + color = cv::IMREAD_COLOR; + else + color = cv::IMREAD_GRAYSCALE; if( !parser.check() ) @@ -85,8 +123,10 @@ int main(int argc, char *argv[]) detector.read(fs["icfdetector"]); fs.release(); vector objects; - Mat img = imread(image_path); - detector.detect(img, objects, 1.1f, Size(40, 40), - Size(300, 300), threshold); + Mat img = imread(image_path, color); + std::vector values; + detector.detect(img, objects, 1.1f, Size(min_cols, min_rows), Size(max_cols, max_rows), threshold, step, values); imwrite(out_image_path, visualize(img, objects)); + + } diff --git a/modules/adas/tools/fcw_train/fcw_train.cpp b/modules/adas/tools/fcw_train/fcw_train.cpp index 2b51cd61a57..a7f0e81d081 100644 --- a/modules/adas/tools/fcw_train/fcw_train.cpp +++ b/modules/adas/tools/fcw_train/fcw_train.cpp @@ -26,9 +26,14 @@ using cv::imread; #include using cv::CommandLineParser; using cv::FileStorage; +#include + +#include // std::time +#include // std::rand, std::srand #include + using cv::xobjdetect::ICFDetectorParams; using cv::xobjdetect::ICFDetector; using cv::xobjdetect::WaldBoost; @@ -46,8 +51,11 @@ static bool read_model_size(const char *str, int *rows, int *cols) return true; } +static int randomPred (int i) { return std::rand()%i;} + int main(int argc, char *argv[]) { + const string keys = "{help | | print this message}" "{pos_path | pos | path to training object samples}" @@ -57,8 +65,15 @@ int main(int argc, char *argv[]) "{weak_count | 100 | number of weak classifiers in cascade}" "{model_size | 40x40 | model size in pixels}" "{model_filename | model.xml | filename for saving model}" + "{features_type | icf | features type, \"icf\" or \"acf\"}" + "{alpha | 0.02 | alpha value}" + "{is_grayscale | false | read the image as grayscale}" + "{use_fast_log | false | use fast log function}" + "{limit_ps | -1 | limit to positive samples (-1 means all)}" + "{limit_bg | -1 | limit to negative samples (-1 means all)}" ; + CommandLineParser parser(argc, argv, keys); parser.about("FCW trainer"); @@ -76,7 +91,14 @@ int main(int argc, char *argv[]) params.feature_count = parser.get("feature_count"); params.weak_count = parser.get("weak_count"); params.bg_per_image = parser.get("bg_per_image"); - + params.features_type = parser.get("features_type"); + params.alpha = parser.get("alpha"); + params.is_grayscale = parser.get("is_grayscale"); + params.use_fast_log = parser.get("use_fast_log"); + + int limit_ps = parser.get("limit_ps"); + int limit_bg = parser.get("limit_bg"); + string model_size = parser.get("model_size"); if( !read_model_size(model_size.c_str(), ¶ms.model_n_rows, ¶ms.model_n_cols) ) @@ -84,7 +106,7 @@ int main(int argc, char *argv[]) cerr << "Error reading model size from `" << model_size << "`" << endl; return 1; } - + if( params.feature_count <= 0 ) { cerr << "feature_count must be positive number" << endl; @@ -97,20 +119,67 @@ int main(int argc, char *argv[]) return 1; } - if( params.bg_per_image <= 0 ) + if( params.features_type != "icf" && params.features_type != "acf" ) { - cerr << "bg_per_image must be positive number" << endl; + cerr << "features_type must be \"icf\" or \"acf\"" << endl; + return 1; + } + if( params.alpha <= 0 ) + { + cerr << "alpha must be positive float number" << endl; return 1; } - if( !parser.check() ) { parser.printErrors(); return 1; } + + std::vector pos_filenames; + glob(pos_path, pos_filenames); + + std::vector bg_filenames; + glob(bg_path, bg_filenames); + + if(limit_ps != -1 && (int)pos_filenames.size() > limit_ps) + pos_filenames.erase(pos_filenames.begin()+limit_ps, pos_filenames.end()); + if(limit_bg != -1 && (int)bg_filenames.size() > limit_bg) + bg_filenames.erase(bg_filenames.begin()+limit_bg, bg_filenames.end()); + + //random pick input images + bool random_shuffle = false; + if(random_shuffle) + { + std::srand ( unsigned ( std::time(0) ) ); + std::random_shuffle ( pos_filenames.begin(), pos_filenames.end(), randomPred ); + std::random_shuffle ( bg_filenames.begin(), bg_filenames.end(), randomPred ); + } + + int samples_size = (int)((params.bg_per_image * bg_filenames.size()) + pos_filenames.size()); + int features_size = params.feature_count; + int max_features_allowed = (int)(INT_MAX/(sizeof(int)* samples_size)); + int max_samples_allowed = (int)(INT_MAX/(sizeof(int)* features_size)); + int total_samples = (int)((params.bg_per_image * bg_filenames.size()) + pos_filenames.size()); + + + if(total_samples >max_samples_allowed) + { + CV_Error_(1, ("exceeded maximum number of samples. Maximum number of samples with %d features is %d, you have %d (%d positive samples + (%d bg * %d bg_per_image))\n",features_size,max_samples_allowed,total_samples,pos_filenames.size(),bg_filenames.size(),params.bg_per_image )); + } + + if(params.feature_count >max_features_allowed) + { + CV_Error_(1, ("exceeded maximum number of features. Maximum number of features with %d samples is %d, you have %d\n",samples_size,max_features_allowed, features_size )); + } + + std::cout< It applies a spectral whithening (mid-frequency details enhancement) @@ -176,7 +176,6 @@ class CV_EXPORTS_W Retina : public Algorithm { * => if the xml file does not exist, then default setup is applied * => warning, Exceptions are thrown if read XML file is not valid * @param newParameters : a parameters structures updated with the new target configuration - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error */ CV_WRAP virtual void setup(RetinaParameters newParameters)=0; @@ -193,7 +192,7 @@ class CV_EXPORTS_W Retina : public Algorithm { /** * write xml/yml formated parameters information - * @rparam fs : the filename of the xml file that will be open and writen with formatted parameters information + * @param fs : the filename of the xml file that will be open and writen with formatted parameters information */ CV_WRAP virtual void write( String fs ) const=0; diff --git a/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp index 5555292ca99..8ee19d8337f 100644 --- a/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/retinafasttonemapping.hpp @@ -80,7 +80,7 @@ namespace cv{ namespace bioinspired{ /** - * @class RetinaFastToneMappingImpl a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV. + * a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV. * This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc. * As a summary, these are the model properties: * => 2 stages of local luminance adaptation with a different local neighborhood for each. diff --git a/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp b/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp index 95081758170..4dcb7d280e7 100755 --- a/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/transientareassegmentationmodule.hpp @@ -116,7 +116,7 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm * try to open an XML segmentation parameters file to adjust current segmentation instance setup * => if the xml file does not exist, then default setup is applied * => warning, Exceptions are thrown if read XML file is not valid - * @param retinaParameterFile : the parameters filename + * @param segmentationParameterFile : the parameters filename * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error */ CV_WRAP virtual void setup(String segmentationParameterFile="", const bool applyDefaultSetupOnFailure=true)=0; @@ -135,7 +135,6 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm * => if the xml file does not exist, then default setup is applied * => warning, Exceptions are thrown if read XML file is not valid * @param newParameters : a parameters structures updated with the new target configuration - * @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error */ CV_WRAP virtual void setup(SegmentationParameters newParameters)=0; @@ -152,7 +151,7 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm /** * write xml/yml formated parameters information - * @rparam fs : the filename of the xml file that will be open and writen with formatted parameters information + * @param fs : the filename of the xml file that will be open and writen with formatted parameters information */ CV_WRAP virtual void write( String fs ) const=0; @@ -181,9 +180,8 @@ class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm CV_WRAP virtual void clearAllBuffers()=0; }; - /** - * allocator - * @param Size : size of the images input to segment (output will be the same size) + /** allocator + * @param inputSize : size of the images input to segment (output will be the same size) */ CV_EXPORTS_W Ptr createTransientAreasSegmentationModule(Size inputSize); diff --git a/modules/ccalib/README.md b/modules/ccalib/README.md new file mode 100644 index 00000000000..5b6289961b2 --- /dev/null +++ b/modules/ccalib/README.md @@ -0,0 +1,2 @@ +Custom Calibration Pattern for 3D reconstruction +================================================ diff --git a/modules/cvv/README.md b/modules/cvv/README.md index 5427edeb0bc..f6f58acf2d9 100644 --- a/modules/cvv/README.md +++ b/modules/cvv/README.md @@ -1,2 +1,2 @@ -CVVisual -======== +GUI for Interactive Visual Debugging of Computer Vision Programs +================================================================ diff --git a/modules/datasets/CMakeLists.txt b/modules/datasets/CMakeLists.txt index d01e5b2884b..0102b7a9245 100644 --- a/modules/datasets/CMakeLists.txt +++ b/modules/datasets/CMakeLists.txt @@ -1,4 +1,4 @@ set(the_description "datasets framework") -ocv_define_module(datasets opencv_core opencv_face opencv_ml opencv_flann) +ocv_define_module(datasets opencv_core opencv_face opencv_ml opencv_flann opencv_text) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4267) # flann, Win64 diff --git a/modules/datasets/doc/datasets.rst b/modules/datasets/doc/datasets.rst index 469bd0c4095..cb4cedfd010 100644 --- a/modules/datasets/doc/datasets.rst +++ b/modules/datasets/doc/datasets.rst @@ -4,16 +4,24 @@ datasets. Framework for working with different datasets .. highlight:: cpp -The datasets module includes classes for working with different datasets: load data, evaluate different algorithms on them, etc. +The datasets module includes classes for working with different datasets: load data, evaluate different algorithms on them, contains benchmarks, etc. + +It is planned to have: + + * basic: loading code for all datasets to help start work with them. + * next stage: quick benchmarks for all datasets to show how to solve them using OpenCV and implement evaluation code. + * finally: implement on OpenCV state-of-the-art algorithms, which solve these tasks. .. toctree:: :hidden: datasets/ar_hmdb datasets/ar_sports + datasets/fr_adience datasets/fr_lfw datasets/gr_chalearn datasets/gr_skig + datasets/hpe_humaneva datasets/hpe_parse datasets/ir_affine datasets/ir_robot @@ -33,14 +41,16 @@ The datasets module includes classes for working with different datasets: load d Action Recognition ------------------ - :doc:`datasets/ar_hmdb` + :doc:`datasets/ar_hmdb` [#f1]_ :doc:`datasets/ar_sports` Face Recognition ---------------- - :doc:`datasets/fr_lfw` + :doc:`datasets/fr_adience` + + :doc:`datasets/fr_lfw` [#f1]_ Gesture Recognition ------------------- @@ -52,6 +62,8 @@ Gesture Recognition Human Pose Estimation --------------------- + :doc:`datasets/hpe_humaneva` + :doc:`datasets/hpe_parse` Image Registration @@ -80,14 +92,14 @@ Object Recognition :doc:`datasets/or_imagenet` - :doc:`datasets/or_mnist` + :doc:`datasets/or_mnist` [#f2]_ :doc:`datasets/or_sun` Pedestrian Detection -------------------- - :doc:`datasets/pd_caltech` + :doc:`datasets/pd_caltech` [#f2]_ SLAM ---- @@ -101,5 +113,9 @@ Text Recognition :doc:`datasets/tr_chars` - :doc:`datasets/tr_svt` + :doc:`datasets/tr_svt` [#f1]_ + +*Footnotes* + .. [#f1] Benchmark implemented + .. [#f2] Not used in Vision Challenge diff --git a/modules/datasets/doc/datasets/ar_hmdb.rst b/modules/datasets/doc/datasets/ar_hmdb.rst index 9cd1c78d3c1..dd4e297f232 100644 --- a/modules/datasets/doc/datasets/ar_hmdb.rst +++ b/modules/datasets/doc/datasets/ar_hmdb.rst @@ -17,7 +17,7 @@ _`"HMDB: A Large Human Motion Database"`: http://serre-lab.clps.brown.edu/resour Benchmark """"""""" -For this dataset was implemented benchmark, which gives accuracy: 0.107407 (using precomputed HOG/HOF "STIP" features from site, averaging for 3 splits) +For this dataset was implemented benchmark with accuracy: 0.107407 (using precomputed HOG/HOF "STIP" features from site, averaging for 3 splits) To run this benchmark execute: @@ -27,3 +27,10 @@ To run this benchmark execute: (precomputed features should be unpacked in the same folder: /home/user/path_to_unpacked_folders/hmdb51_org_stips/) +**References:** + +.. [Kuehne11] H. Kuehne, H. Jhuang, E. Garrote, T. Poggio, and T. Serre. HMDB: A Large Video Database for Human Motion Recognition. ICCV, 2011 + +.. [Laptev08] I. Laptev, M. Marszalek, C. Schmid, and B. Rozenfeld. Learning Realistic Human Actions From Movies. CVPR, 2008 + + diff --git a/modules/datasets/doc/datasets/ar_sports.rst b/modules/datasets/doc/datasets/ar_sports.rst index 717651f8ea7..eef86c7ff40 100644 --- a/modules/datasets/doc/datasets/ar_sports.rst +++ b/modules/datasets/doc/datasets/ar_sports.rst @@ -12,3 +12,7 @@ _`"Sports-1M Dataset"`: http://cs.stanford.edu/people/karpathy/deepvideo/ 2. To load data run: ./opencv/build/bin/example_datasets_ar_sports -p=/home/user/path_to_downloaded_folders/ +**References:** + +.. [KarpathyCVPR14] Andrej Karpathy and George Toderici and Sanketh Shetty and Thomas Leung and Rahul Sukthankar and Li Fei-Fei. Large-scale Video Classification with Convolutional Neural Networks. CVPR, 2014 + diff --git a/modules/datasets/doc/datasets/fr_adience.rst b/modules/datasets/doc/datasets/fr_adience.rst new file mode 100644 index 00000000000..2bde5ecbda5 --- /dev/null +++ b/modules/datasets/doc/datasets/fr_adience.rst @@ -0,0 +1,20 @@ +Adience +======= +.. ocv:class:: FR_adience + +Implements loading dataset: + +_`"Adience"`: http://www.openu.ac.il/home/hassner/Adience/data.html + +.. note:: Usage + + 1. From link above download any dataset file: faces.tar.gz\\aligned.tar.gz and files with splits: fold_0_data.txt-fold_4_data.txt, fold_frontal_0_data.txt-fold_frontal_4_data.txt. (For face recognition task another splits should be created) + + 2. Unpack dataset file to some folder and place split files into the same folder. + + 3. To load data run: ./opencv/build/bin/example_datasets_fr_adience -p=/home/user/path_to_created_folder/ + +**References:** + +.. [Eidinger] E. Eidinger, R. Enbar, and T. Hassner. Age and Gender Estimation of Unfiltered Faces + diff --git a/modules/datasets/doc/datasets/fr_lfw.rst b/modules/datasets/doc/datasets/fr_lfw.rst index 00182a1792b..882c7a1823c 100644 --- a/modules/datasets/doc/datasets/fr_lfw.rst +++ b/modules/datasets/doc/datasets/fr_lfw.rst @@ -8,7 +8,7 @@ _`"Labeled Faces in the Wild"`: http://vis-www.cs.umass.edu/lfw/ .. note:: Usage - 1. From link above download any dataset file: lfw.tgz\lfwa.tar.gz\lfw-deepfunneled.tgz\lfw-funneled.tgz and files with pairs: 10 test splits: pairs.txt and developer train split: pairsDevTrain.txt. + 1. From link above download any dataset file: lfw.tgz\\lfwa.tar.gz\\lfw-deepfunneled.tgz\\lfw-funneled.tgz and files with pairs: 10 test splits: pairs.txt and developer train split: pairsDevTrain.txt. 2. Unpack dataset file and place pairs.txt and pairsDevTrain.txt in created folder. @@ -17,7 +17,7 @@ _`"Labeled Faces in the Wild"`: http://vis-www.cs.umass.edu/lfw/ Benchmark """"""""" -For this dataset was implemented benchmark, which gives accuracy: 0.623833 +- 0.005223 (train split: pairsDevTrain.txt, dataset: lfwa) +For this dataset was implemented benchmark with accuracy: 0.623833 +- 0.005223 (train split: pairsDevTrain.txt, dataset: lfwa) To run this benchmark execute: @@ -25,3 +25,7 @@ To run this benchmark execute: ./opencv/build/bin/example_datasets_fr_lfw_benchmark -p=/home/user/path_to_unpacked_folder/lfw2/ +**References:** + +.. [Huang07] G.B. Huang, M. Ramesh, T. Berg, and E. Learned-Miller. Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments. 2007 + diff --git a/modules/datasets/doc/datasets/gr_chalearn.rst b/modules/datasets/doc/datasets/gr_chalearn.rst index a91dc7c6015..ae83faad83e 100644 --- a/modules/datasets/doc/datasets/gr_chalearn.rst +++ b/modules/datasets/doc/datasets/gr_chalearn.rst @@ -16,3 +16,7 @@ _`"ChaLearn Looking at People"`: http://gesture.chalearn.org/ 4. To load data run: ./opencv/build/bin/example_datasets_gr_chalearn -p=/home/user/path_to_unpacked_folders/ +**References:** + +.. [Escalera14] S. Escalera, X. Baró, J. Gonzàlez, M.A. Bautista, M. Madadi, M. Reyes, V. Ponce-López, H.J. Escalante, J. Shotton, I. Guyon, "ChaLearn Looking at People Challenge 2014: Dataset and Results", ECCV Workshops, 2014 + diff --git a/modules/datasets/doc/datasets/gr_skig.rst b/modules/datasets/doc/datasets/gr_skig.rst index 483f20137ed..cdf14735561 100644 --- a/modules/datasets/doc/datasets/gr_skig.rst +++ b/modules/datasets/doc/datasets/gr_skig.rst @@ -14,3 +14,7 @@ _`"Sheffield Kinect Gesture Dataset"`: http://lshao.staff.shef.ac.uk/data/Sheffi 3. To load data run: ./opencv/build/bin/example_datasets_gr_skig -p=/home/user/path_to_unpacked_folders/ +**References:** + +.. [Liu13] L. Liu and L. Shao, “Learning Discriminative Representations from RGB-D Video Data”, In Proc. International Joint Conference on Artificial Intelligence (IJCAI), Beijing, China, 2013. + diff --git a/modules/datasets/doc/datasets/hpe_humaneva.rst b/modules/datasets/doc/datasets/hpe_humaneva.rst new file mode 100644 index 00000000000..9a35094c01d --- /dev/null +++ b/modules/datasets/doc/datasets/hpe_humaneva.rst @@ -0,0 +1,22 @@ +HumanEva Dataset +================ +.. ocv:class:: HPE_humaneva + +Implements loading dataset: + +_`"HumanEva Dataset"`: http://humaneva.is.tue.mpg.de + +.. note:: Usage + + 1. From link above download dataset files for HumanEva-I (tar) & HumanEva-II. + + 2. Unpack them to HumanEva_1 & HumanEva_2 accordingly. + + 3. To load data run: ./opencv/build/bin/example_datasets_hpe_humaneva -p=/home/user/path_to_unpacked_folders/ + +**References:** + +.. [Sigal10] L. Sigal, A. Balan and M. J. Black. HumanEva: Synchronized Video and Motion Capture Dataset and Baseline Algorithm for Evaluation of Articulated Human Motion, In International Journal of Computer Vision, Vol. 87 (1-2), 2010 + +.. [Sigal06] L. Sigal and M. J. Black. HumanEva: Synchronized Video and Motion Capture Dataset for Evaluation of Articulated Human Motion, Techniacl Report CS-06-08, Brown University, 2006 + diff --git a/modules/datasets/doc/datasets/hpe_parse.rst b/modules/datasets/doc/datasets/hpe_parse.rst index e4f88927c9a..8ac48966d6a 100644 --- a/modules/datasets/doc/datasets/hpe_parse.rst +++ b/modules/datasets/doc/datasets/hpe_parse.rst @@ -14,3 +14,7 @@ _`"PARSE Dataset"`: http://www.ics.uci.edu/~dramanan/papers/parse/ 3. To load data run: ./opencv/build/bin/example_datasets_hpe_parse -p=/home/user/path_to_unpacked_folder/people_all/ +**References:** + +.. [Ramanan06] D. Ramanan "Learning to Parse Images of Articulated Bodies." Neural Info. Proc. Systems (NIPS) To appear. Dec 2006. + diff --git a/modules/datasets/doc/datasets/ir_affine.rst b/modules/datasets/doc/datasets/ir_affine.rst index eb40dd65d72..1bd0105ee0b 100644 --- a/modules/datasets/doc/datasets/ir_affine.rst +++ b/modules/datasets/doc/datasets/ir_affine.rst @@ -14,3 +14,7 @@ _`"Affine Covariant Regions Datasets"`: http://www.robots.ox.ac.uk/~vgg/data/dat 3. To load data, for example, for "bark", run: ./opencv/build/bin/example_datasets_ir_affine -p=/home/user/path_to_unpacked_folder/bark/ +**References:** + +.. [Mikolajczyk05] K. Mikolajczyk, T. Tuytelaars, C. Schmid, A. Zisserman, J. Matas, F. Schaffalitzky, T. Kadir, L. Van Gool. A Comparison of Affine Region Detectors. International Journal of Computer Vision, Volume 65, Number 1/2, page 43--72, 2005 + diff --git a/modules/datasets/doc/datasets/ir_robot.rst b/modules/datasets/doc/datasets/ir_robot.rst index 8a9295ef2bf..249d88e6c55 100644 --- a/modules/datasets/doc/datasets/ir_robot.rst +++ b/modules/datasets/doc/datasets/ir_robot.rst @@ -4,12 +4,17 @@ Robot Data Set Implements loading dataset: -_`"Robot Data Set"`: http://roboimagedata.compute.dtu.dk/?page_id=24 +_`"Robot Data Set, Point Feature Data Set – 2010"`: http://roboimagedata.compute.dtu.dk/?page_id=24 .. note:: Usage - 1. From link above download files for dataset "Point Feature Data Set – 2010": SET001_6.tar.gz-SET055_60.tar.gz (there are two data sets: - Full resolution images (1200×1600), ~500 Gb and - Half size image (600×800), ~115 Gb.) + 1. From link above download dataset files: SET001_6.tar.gz-SET055_60.tar.gz + 2. Unpack them to one folder. 3. To load data run: ./opencv/build/bin/example_datasets_ir_robot -p=/home/user/path_to_unpacked_folder/ +**References:** + +.. [aanæsinteresting] Aan{\ae}s, H. and Dahl, A.L. and Steenstrup Pedersen, K. Interesting Interest Points. International Journal of Computer Vision. 2012. + diff --git a/modules/datasets/doc/datasets/is_bsds.rst b/modules/datasets/doc/datasets/is_bsds.rst index cd4910119ba..b05a0f727de 100644 --- a/modules/datasets/doc/datasets/is_bsds.rst +++ b/modules/datasets/doc/datasets/is_bsds.rst @@ -14,3 +14,7 @@ _`"The Berkeley Segmentation Dataset and Benchmark"`: https://www.eecs.berkeley. 3. To load data run: ./opencv/build/bin/example_datasets_is_bsds -p=/home/user/path_to_unpacked_folder/BSDS300/ +**References:** + +.. [MartinFTM01] D. Martin and C. Fowlkes and D. Tal and J. Malik. A Database of Human Segmented Natural Images and its Application to Evaluating Segmentation Algorithms and Measuring Ecological Statistics. 2001 + diff --git a/modules/datasets/doc/datasets/is_weizmann.rst b/modules/datasets/doc/datasets/is_weizmann.rst index a4b57022054..ce3e71ac6dc 100644 --- a/modules/datasets/doc/datasets/is_weizmann.rst +++ b/modules/datasets/doc/datasets/is_weizmann.rst @@ -14,3 +14,7 @@ _`"Weizmann Segmentation Evaluation Database"`: http://www.wisdom.weizmann.ac.il 3. To load data, for example, for 1 object dataset, run: ./opencv/build/bin/example_datasets_is_weizmann -p=/home/user/path_to_unpacked_folder/1obj/ +**References:** + +.. [AlpertGBB07] Sharon Alpert and Meirav Galun and Ronen Basri and Achi Brandt. Image Segmentation by Probabilistic Bottom-Up Aggregation and Cue Integration. 2007 + diff --git a/modules/datasets/doc/datasets/msm_epfl.rst b/modules/datasets/doc/datasets/msm_epfl.rst index 64288e5cf69..5cf21284cd8 100644 --- a/modules/datasets/doc/datasets/msm_epfl.rst +++ b/modules/datasets/doc/datasets/msm_epfl.rst @@ -14,3 +14,7 @@ _`"EPFL Multi-View Stereo"`: http://cvlabwww.epfl.ch/~strecha/multiview/denseMVS 3. To load data, for example, for "fountain", run: ./opencv/build/bin/example_datasets_msm_epfl -p=/home/user/path_to_unpacked_folder/fountain/ +**References:** + +.. [Strecha08] C. Strecha, W. von Hansen, L. Van Gool, P. Fua, U. Thoennessen. On Benchmarking Camera Calibration and Multi-View Stereo for High Resolution Imagery. CVPR, 2008 + diff --git a/modules/datasets/doc/datasets/msm_middlebury.rst b/modules/datasets/doc/datasets/msm_middlebury.rst index 449d59ae20e..6e35c596569 100644 --- a/modules/datasets/doc/datasets/msm_middlebury.rst +++ b/modules/datasets/doc/datasets/msm_middlebury.rst @@ -14,3 +14,7 @@ _`"Stereo – Middlebury Computer Vision"`: http://vision.middlebury.edu/mview/ 3. To load data, for example "temple" dataset, run: ./opencv/build/bin/example_datasets_msm_middlebury -p=/home/user/path_to_unpacked_folder/temple/ +**References:** + +.. [Seitz06] S. M. Seitz, B. Curless, J. Diebel, D. Scharstein, R. Szeliski. A Comparison and Evaluation of Multi-View Stereo Reconstruction Algorithms, CVPR, 2006 + diff --git a/modules/datasets/doc/datasets/or_imagenet.rst b/modules/datasets/doc/datasets/or_imagenet.rst index 48e10739c87..b7caf7e9cf0 100644 --- a/modules/datasets/doc/datasets/or_imagenet.rst +++ b/modules/datasets/doc/datasets/or_imagenet.rst @@ -6,13 +6,34 @@ Implements loading dataset: _`"ImageNet"`: http://www.image-net.org/ -Currently implemented loading full list with urls. Planned to implement dataset from ILSVRC challenge. - .. note:: Usage - 1. From link above download dataset file: imagenet_fall11_urls.tgz + 1. From link above download dataset files: ILSVRC2010_images_train.tar\\ILSVRC2010_images_test.tar\\ILSVRC2010_images_val.tar & devkit: ILSVRC2010_devkit-1.0.tar.gz (Implemented loading of 2010 dataset as only this dataset has ground truth for test data, but structure for ILSVRC2014 is similar) + + 2. Unpack them to: some_folder/train/\\some_folder/test/\\some_folder/val & some_folder/ILSVRC2010_validation_ground_truth.txt\\some_folder/ILSVRC2010_test_ground_truth.txt. + + 3. Create file with labels: some_folder/labels.txt, for example, using :ref:`python script ` below (each file's row format: synset,labelID,description. For example: "n07751451,18,plum"). + + 4. Unpack all tar files in train. + + 5. To load data run: ./opencv/build/bin/example_datasets_or_imagenet -p=/home/user/some_folder/ + +.. _python-script: + +Python script to parse meta.mat: + +:: + + import scipy.io + meta_mat = scipy.io.loadmat("devkit-1.0/data/meta.mat") + + labels_dic = dict((m[0][1][0], m[0][0][0][0]-1) for m in meta_mat['synsets'] + label_names_dic = dict((m[0][1][0], m[0][2][0]) for m in meta_mat['synsets'] + + for label in labels_dic.keys(): + print "{0},{1},{2}".format(label, labels_dic[label], label_names_dic[label]) - 2. Unpack it. +**References:** - 3. To load data run: ./opencv/build/bin/example_datasets_or_imagenet -p=/home/user/path_to_unpacked_file/ +.. [ILSVRCarxiv14] Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei. ImageNet Large Scale Visual Recognition Challenge. 2014 diff --git a/modules/datasets/doc/datasets/or_mnist.rst b/modules/datasets/doc/datasets/or_mnist.rst index 1253cba0135..a09720d7ec1 100644 --- a/modules/datasets/doc/datasets/or_mnist.rst +++ b/modules/datasets/doc/datasets/or_mnist.rst @@ -14,3 +14,7 @@ _`"MNIST"`: http://yann.lecun.com/exdb/mnist/ 3. To load data run: ./opencv/build/bin/example_datasets_or_mnist -p=/home/user/path_to_unpacked_files/ +**References:** + +.. [LeCun98a] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, 1998. + diff --git a/modules/datasets/doc/datasets/or_sun.rst b/modules/datasets/doc/datasets/or_sun.rst index d6a7cdf2e00..5a58d2cb6d6 100644 --- a/modules/datasets/doc/datasets/or_sun.rst +++ b/modules/datasets/doc/datasets/or_sun.rst @@ -4,15 +4,19 @@ SUN Database Implements loading dataset: -_`"SUN Database"`: http://sundatabase.mit.edu/ - -Currently implemented loading "Scene Recognition Benchmark. SUN397". Planned to implement also "Object Detection Benchmark. SUN2012". +_`"SUN Database, Scene Recognition Benchmark. SUN397"`: http://vision.cs.princeton.edu/projects/2010/SUN/ .. note:: Usage - 1. From link above download dataset file: SUN397.tar + 1. From link above download dataset file: SUN397.tar & file with splits: Partitions.zip + + 2. Unpack SUN397.tar into folder: SUN397/ & Partitions.zip into folder: SUN397/Partitions/ + + 3. To load data run: ./opencv/build/bin/example_datasets_or_sun -p=/home/user/path_to_unpacked_files/SUN397/ + +**References:** - 2. Unpack it. +.. [Xiao10] J. Xiao, J. Hays, K. Ehinger, A. Oliva, and A. Torralba. SUN Database: Large-scale Scene Recognition from Abbey to Zoo. IEEE Conference on Computer Vision and Pattern Recognition. CVPR, 2010 - 3. To load data run: ./opencv/build/bin/example_datasets_or_sun -p=/home/user/path_to_unpacked_folder/SUN397/ +.. [Xiao14] J. Xiao, K. A. Ehinger, J. Hays, A. Torralba, and A. Oliva. SUN Database: Exploring a Large Collection of Scene Categories. International Journal of Computer Vision. IJCV, 2014 diff --git a/modules/datasets/doc/datasets/pd_caltech.rst b/modules/datasets/doc/datasets/pd_caltech.rst index 5bbf8c143c2..e1c86fe29bc 100644 --- a/modules/datasets/doc/datasets/pd_caltech.rst +++ b/modules/datasets/doc/datasets/pd_caltech.rst @@ -21,3 +21,9 @@ _`"Caltech Pedestrian Detection Benchmark"`: http://www.vision.caltech.edu/Image 3. To load data run: ./opencv/build/bin/example_datasets_pd_caltech -p=/home/user/path_to_unpacked_folders/ +**References:** + +.. [Dollár12] P. Dollár, C. Wojek, B. Schiele and P. Perona. Pedestrian Detection: An Evaluation of the State of the Art. PAMI, 2012. + +.. [DollárCVPR09] P. Dollár, C. Wojek, B. Schiele and P. Perona. Pedestrian Detection: A Benchmark. CVPR, 2009 + diff --git a/modules/datasets/doc/datasets/slam_kitti.rst b/modules/datasets/doc/datasets/slam_kitti.rst index a84ffaf2f45..be8d6eff2bd 100644 --- a/modules/datasets/doc/datasets/slam_kitti.rst +++ b/modules/datasets/doc/datasets/slam_kitti.rst @@ -14,3 +14,11 @@ _`"KITTI Vision Benchmark"`: http://www.cvlibs.net/datasets/kitti/eval_odometry. 3. To load data run: ./opencv/build/bin/example_datasets_slam_kitti -p=/home/user/path_to_unpacked_folder/dataset/ +**References:** + +.. [Geiger2012CVPR] Andreas Geiger and Philip Lenz and Raquel Urtasun. Are we ready for Autonomous Driving? The KITTI Vision Benchmark Suite. CVPR, 2012 + +.. [Geiger2013IJRR] Andreas Geiger and Philip Lenz and Christoph Stiller and Raquel Urtasun. Vision meets Robotics: The KITTI Dataset. IJRR, 2013 + +.. [Fritsch2013ITSC] Jannik Fritsch and Tobias Kuehnl and Andreas Geiger. A New Performance Measure and Evaluation Benchmark for Road Detection Algorithms. ITSC, 2013 + diff --git a/modules/datasets/doc/datasets/slam_tumindoor.rst b/modules/datasets/doc/datasets/slam_tumindoor.rst index 07c7767c172..1b72a873c2c 100644 --- a/modules/datasets/doc/datasets/slam_tumindoor.rst +++ b/modules/datasets/doc/datasets/slam_tumindoor.rst @@ -14,3 +14,7 @@ _`"TUMindoor Dataset"`: http://www.navvis.lmt.ei.tum.de/dataset/ 3. To load each dataset run: ./opencv/build/bin/example_datasets_slam_tumindoor -p=/home/user/path_to_unpacked_folders/ +**References:** + +.. [TUMindoor] R. Huitl and G. Schroth and S. Hilsenbeck and F. Schweiger and E. Steinbach. {TUM}indoor: An Extensive Image and Point Cloud Dataset for Visual Indoor Localization and Mapping. 2012 + diff --git a/modules/datasets/doc/datasets/tr_chars.rst b/modules/datasets/doc/datasets/tr_chars.rst index eb321d08a02..041db074afc 100644 --- a/modules/datasets/doc/datasets/tr_chars.rst +++ b/modules/datasets/doc/datasets/tr_chars.rst @@ -16,3 +16,7 @@ _`"The Chars74K Dataset"`: http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/ 4. To load data, for example "EnglishImg", run: ./opencv/build/bin/example_datasets_tr_chars -p=/home/user/path_to_unpacked_folder/English/ +**References:** + +.. [Campos09] T. E. de Campos, B. R. Babu and M. Varma. Character recognition in natural images. In Proceedings of the International Conference on Computer Vision Theory and Applications (VISAPP), 2009 + diff --git a/modules/datasets/doc/datasets/tr_svt.rst b/modules/datasets/doc/datasets/tr_svt.rst index f5aeb86437c..348a433f5ee 100644 --- a/modules/datasets/doc/datasets/tr_svt.rst +++ b/modules/datasets/doc/datasets/tr_svt.rst @@ -14,3 +14,20 @@ _`"The Street View Text Dataset"`: http://vision.ucsd.edu/~kai/svt/ 3. To load data run: ./opencv/build/bin/example_datasets_tr_svt -p=/home/user/path_to_unpacked_folder/svt/svt1/ +Benchmark +""""""""" + +For this dataset was implemented benchmark with accuracy (mean f1): 0.217 + +To run benchmark execute: + +.. code-block:: bash + + ./opencv/build/bin/example_datasets_tr_svt_benchmark -p=/home/user/path_to_unpacked_folders/svt/svt1/ + +**References:** + +.. [Wang11] Kai Wang, Boris Babenko and Serge Belongie. End-to-end Scene Text Recognition. ICCV, 2011 + +.. [Wang10] Kai Wang and Serge Belongie. Word Spotting in the Wild. ECCV, 2010 + diff --git a/modules/datasets/include/opencv2/datasets/fr_adience.hpp b/modules/datasets/include/opencv2/datasets/fr_adience.hpp new file mode 100644 index 00000000000..7d58b095894 --- /dev/null +++ b/modules/datasets/include/opencv2/datasets/fr_adience.hpp @@ -0,0 +1,93 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_DATASETS_FR_ADIENCE_HPP +#define OPENCV_DATASETS_FR_ADIENCE_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +enum genderType +{ + male = 0, + female, + none +}; + +struct FR_adienceObj : public Object +{ + std::string user_id; + std::string original_image; + int face_id; + std::string age; + genderType gender; + int x; + int y; + int dx; + int dy; + int tilt_ang; + int fiducial_yaw_angle; + int fiducial_score; +}; + +class CV_EXPORTS FR_adience : public Dataset +{ +public: + virtual void load(const std::string &path) = 0; + + static Ptr create(); + + std::vector paths; +}; + +} +} + +#endif diff --git a/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp b/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp new file mode 100644 index 00000000000..6a093f0b5f0 --- /dev/null +++ b/modules/datasets/include/opencv2/datasets/hpe_humaneva.hpp @@ -0,0 +1,85 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_DATASETS_HPE_HUMANEVA_HPP +#define OPENCV_DATASETS_HPE_HUMANEVA_HPP + +#include +#include + +#include "opencv2/datasets/dataset.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +struct HPE_humanevaObj : public Object +{ + char person; // 1..4 + std::string action; + int type1; + std::string type2; + Matx13d ofs; + std::string fileName; + std::vector imageNames; // for HumanEva_II +}; + +enum datasetType +{ + humaneva_1 = 1, + humaneva_2 +}; + +class CV_EXPORTS HPE_humaneva : public Dataset +{ +public: + virtual void load(const std::string &path) = 0; + + static Ptr create(int num=humaneva_1); +}; + +} +} + +#endif diff --git a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp index 2c6b7446567..56564adbaab 100644 --- a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp +++ b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp @@ -56,9 +56,8 @@ namespace datasets struct OR_imagenetObj : public Object { - std::string wnid; - int id2; - std::string imageUrl; + int id; + std::string image; }; class CV_EXPORTS OR_imagenet : public Dataset diff --git a/modules/datasets/include/opencv2/datasets/or_sun.hpp b/modules/datasets/include/opencv2/datasets/or_sun.hpp index be438d4dee9..09db30b8ee4 100644 --- a/modules/datasets/include/opencv2/datasets/or_sun.hpp +++ b/modules/datasets/include/opencv2/datasets/or_sun.hpp @@ -56,8 +56,8 @@ namespace datasets struct OR_sunObj : public Object { + int label; std::string name; - std::vector imageNames; }; class CV_EXPORTS OR_sun : public Dataset @@ -66,6 +66,8 @@ class CV_EXPORTS OR_sun : public Dataset virtual void load(const std::string &path) = 0; static Ptr create(); + + std::vector paths; }; } diff --git a/modules/datasets/include/opencv2/datasets/util.hpp b/modules/datasets/include/opencv2/datasets/util.hpp index 9832e74067a..b1520b0754d 100644 --- a/modules/datasets/include/opencv2/datasets/util.hpp +++ b/modules/datasets/include/opencv2/datasets/util.hpp @@ -45,6 +45,11 @@ #include #include +#include +#include // atoi, atof + +#include + #include namespace cv diff --git a/modules/datasets/samples/fr_adience.cpp b/modules/datasets/samples/fr_adience.cpp new file mode 100644 index 00000000000..ece94f1c3a4 --- /dev/null +++ b/modules/datasets/samples/fr_adience.cpp @@ -0,0 +1,105 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/datasets/fr_adience.hpp" + +#include + +#include + +#include +#include + +using namespace std; +using namespace cv; +using namespace cv::datasets; + +int main(int argc, char *argv[]) +{ + const char *keys = + "{ help h usage ? | | show this message }" + "{ path p |true| path to dataset folder and splits }"; + CommandLineParser parser(argc, argv, keys); + string path(parser.get("path")); + if (parser.has("help") || path=="true") + { + parser.printMessage(); + return -1; + } + + Ptr dataset = FR_adience::create(); + dataset->load(path); + + // *************** + // dataset contains for each object its images. + // For example, let output splits number, dataset size and last image. + int numSplits = dataset->getNumSplits(); + printf("splits number: %u\n", numSplits); + printf("dataset size: %u\n", (unsigned int)dataset->getTrain().size()); + + FR_adienceObj *example = static_cast(dataset->getTrain().back().get()); + printf("last image:\n"); + printf("user_id: %s\n", example->user_id.c_str()); + printf("original_image: %s\n", example->original_image.c_str()); + printf("face_id: %u\n", example->face_id); + printf("age: %s\n", example->age.c_str()); + printf("gender: "); + if (example->gender == male) + { + printf("m\n"); + } else + if (example->gender == female) + { + printf("f\n"); + } else + { + printf("none\n"); + } + printf("x: %u\n", example->x); + printf("y: %u\n", example->y); + printf("dx: %u\n", example->dx); + printf("dy: %u\n", example->dy); + printf("tilt_ang: %u\n", example->tilt_ang); + printf("fiducial_yaw_angle: %u\n", example->fiducial_yaw_angle); + printf("fiducial_score: %u\n", example->fiducial_score); + + return 0; +} diff --git a/modules/datasets/samples/hpe_humaneva.cpp b/modules/datasets/samples/hpe_humaneva.cpp new file mode 100644 index 00000000000..45a9802b863 --- /dev/null +++ b/modules/datasets/samples/hpe_humaneva.cpp @@ -0,0 +1,100 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/datasets/hpe_humaneva.hpp" + +#include + +#include +#include // atoi + +#include +#include + +using namespace std; +using namespace cv; +using namespace cv::datasets; + +int main(int argc, char *argv[]) +{ + const char *keys = + "{ help h usage ? | | show this message }" + "{ path p |true| path to dataset folder }"; + CommandLineParser parser(argc, argv, keys); + string path(parser.get("path")); + if (parser.has("help") || path=="true") + { + parser.printMessage(); + return -1; + } + + for (unsigned int i=1; i<=2; ++i) + { + printf("\tHumanEva %u\n", i); + char number[2]; + sprintf(number, "%u", i); + string pathCurr(path+"HumanEva_"+number+"/"); + + Ptr dataset = HPE_humaneva::create(i); + dataset->load(pathCurr); + + // *************** + // dataset contains pair of rgb\dep images + // For example, let output train size and last element. + HPE_humanevaObj *example = static_cast(dataset->getTrain().back().get()); + printf("train size: %u\n", (unsigned int)dataset->getTrain().size()); + printf("last train video:\n"); + printf("person: %u\n", example->person); + printf("action: %s\n", example->action.c_str()); + printf("type1: %u\n", example->type1); + printf("type2: %s\n", example->type2.c_str()); + printf("filename: %s\n", example->fileName.c_str()); + printf("num images: %u\n", (int)example->imageNames.size()); + printf("ofs:"); + for (int j=0; j<3; ++j) + { + printf(" %f", example->ofs(0, j)); + } + printf("\n"); + } + + return 0; +} diff --git a/modules/datasets/samples/or_imagenet.cpp b/modules/datasets/samples/or_imagenet.cpp index 13b9d8b01e1..4fb61d9f174 100644 --- a/modules/datasets/samples/or_imagenet.cpp +++ b/modules/datasets/samples/or_imagenet.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { const char *keys = "{ help h usage ? | | show this message }" - "{ path p |true| path to file with urls: fall11_urls.txt }"; + "{ path p |true| path to folder with dataset }"; CommandLineParser parser(argc, argv, keys); string path(parser.get("path")); if (parser.has("help") || path=="true") @@ -71,13 +71,28 @@ int main(int argc, char *argv[]) dataset->load(path); // *************** - // dataset contains for each object its id & image url. - // For example, let output dataset size and first object. - printf("dataset size: %u\n", (unsigned int)dataset->getTrain().size()); - OR_imagenetObj *example = static_cast(dataset->getTrain()[0].get()); - printf("first object url: %s\n", example->imageUrl.c_str()); - printf("first object wnid: %s\n", example->wnid.c_str()); - printf("first object id2: %u\n", example->id2); + // dataset contains for each object its id & image path + // For example, let output train\test\validation size and first image. + vector< Ptr > &curr = dataset->getTrain(); + printf("train:\nsize: %u\n", (unsigned int)curr.size()); + OR_imagenetObj *example = static_cast(curr[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); + + vector< Ptr > &currT = dataset->getTest(); + printf("test:\nsize: %u\n", (unsigned int)currT.size()); + example = static_cast(currT[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); + + vector< Ptr > &currV = dataset->getValidation(); + printf("validation:\nsize: %u\n", (unsigned int)currV.size()); + example = static_cast(currV[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); return 0; } diff --git a/modules/datasets/samples/or_sun.cpp b/modules/datasets/samples/or_sun.cpp index 047b9796af2..0750777e52e 100644 --- a/modules/datasets/samples/or_sun.cpp +++ b/modules/datasets/samples/or_sun.cpp @@ -70,16 +70,15 @@ int main(int argc, char *argv[]) // *************** // dataset contains for each object its images. - // For example, let output dataset size and last object. + // For example, let output splits number, dataset size and last image. + int numSplits = dataset->getNumSplits(); + printf("splits number: %u\n", numSplits); printf("dataset size: %u\n", (unsigned int)dataset->getTrain().size()); + OR_sunObj *example = static_cast(dataset->getTrain().back().get()); - printf("last object name: %s\n", example->name.c_str()); - printf("last object images number: %u\n", (unsigned int)example->imageNames.size()); - vector &imageNames = example->imageNames; - for (vector::iterator it=imageNames.begin(); it!=imageNames.end(); ++it) - { - printf("%s\n", (*it).c_str()); - } + printf("last image:\nname: %s\n", example->name.c_str()); + printf("label: %u\n", example->label); + printf("label path: %s\n", dataset->paths[example->label].c_str()); return 0; } diff --git a/modules/datasets/samples/tr_svt_benchmark.cpp b/modules/datasets/samples/tr_svt_benchmark.cpp new file mode 100644 index 00000000000..be2d7429509 --- /dev/null +++ b/modules/datasets/samples/tr_svt_benchmark.cpp @@ -0,0 +1,303 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/datasets/tr_svt.hpp" + +#include + +#include "opencv2/text.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/imgcodecs.hpp" + +#include +#include // atoi + +#include + +#include +#include + +using namespace std; +using namespace cv; +using namespace cv::datasets; +using namespace cv::text; + +//Calculate edit distance between two words +size_t edit_distance(const string& A, const string& B); +size_t min(size_t x, size_t y, size_t z); +bool isRepetitive(const string& s); +bool sort_by_lenght(const string &a, const string &b); +//Draw ER's in an image via floodFill +void er_draw(vector &channels, vector > ®ions, vector group, Mat& segmentation); + +size_t min(size_t x, size_t y, size_t z) +{ + return x < y ? min(x,z) : min(y,z); +} + +size_t edit_distance(const string& A, const string& B) +{ + size_t NA = A.size(); + size_t NB = B.size(); + + vector< vector > M(NA + 1, vector(NB + 1)); + + for (size_t a = 0; a <= NA; ++a) + M[a][0] = a; + + for (size_t b = 0; b <= NB; ++b) + M[0][b] = b; + + for (size_t a = 1; a <= NA; ++a) + for (size_t b = 1; b <= NB; ++b) + { + size_t x = M[a-1][b] + 1; + size_t y = M[a][b-1] + 1; + size_t z = M[a-1][b-1] + (A[a-1] == B[b-1] ? 0 : 1); + M[a][b] = min(x,y,z); + } + + return M[A.size()][B.size()]; +} + +bool sort_by_lenght(const string &a, const string &b){return (a.size()>b.size());} + +bool isRepetitive(const string& s) +{ + int count = 0; + for (int i=0; i<(int)s.size(); i++) + { + if ((s[i] == 'i') || + (s[i] == 'l') || + (s[i] == 'I')) + count++; + } + if (count > ((int)s.size()+1)/2) + { + return true; + } + return false; +} + + +void er_draw(vector &channels, vector > ®ions, vector group, Mat& segmentation) +{ + for (int r=0; r<(int)group.size(); r++) + { + ERStat er = regions[group[r][0]][group[r][1]]; + if (er.parent != NULL) // deprecate the root region + { + int newMaskVal = 255; + int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; + floodFill(channels[group[r][0]],segmentation,Point(er.pixel%channels[group[r][0]].cols,er.pixel/channels[group[r][0]].cols), + Scalar(255),0,Scalar(er.level),Scalar(0),flags); + } + } +} + +int main(int argc, char *argv[]) +{ + const char *keys = + "{ help h usage ? | | show this message }" + "{ path p |true| path to dataset xml files }"; + CommandLineParser parser(argc, argv, keys); + string path(parser.get("path")); + if (parser.has("help") || path=="true") + { + parser.printMessage(); + return -1; + } + + // loading train & test images description + Ptr dataset = TR_svt::create(); + dataset->load(path); + + + vector f1Each; + + unsigned int correctNum = 0; + unsigned int returnedNum = 0; + unsigned int returnedCorrectNum = 0; + + vector< Ptr >& test = dataset->getTest(); + unsigned int num = 0; + for (vector< Ptr >::iterator itT=test.begin(); itT!=test.end(); ++itT) + { + TR_svtObj *example = static_cast((*itT).get()); + + num++; + printf("processed image: %u, name: %s\n", num, example->fileName.c_str()); + + correctNum += example->tags.size(); +/* printf("\ntags:\n"); + for (vector::iterator it=example->tags.begin(); it!=example->tags.end(); ++it) + { + tag &t = (*it); + printf("%s\nx: %u, y: %u, width: %u, height: %u\n", + t.value.c_str(), t.x, t.y, t.x+t.width, t.y+t.height); + }*/ + unsigned int correctNumEach = example->tags.size(); + unsigned int returnedNumEach = 0; + unsigned int returnedCorrectNumEach = 0; + + Mat image = imread((path+example->fileName).c_str()); + /*Text Detection*/ + + // Extract channels to be processed individually + vector channels; + + Mat grey; + cvtColor(image,grey,COLOR_RGB2GRAY); + + // Notice here we are only using grey channel, see textdetection.cpp for example with more channels + channels.push_back(grey); + channels.push_back(255-grey); + + // Create ERFilter objects with the 1st and 2nd stage default classifiers + Ptr er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00015f,0.13f,0.2f,true,0.1f); + Ptr er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5); + + vector > regions(channels.size()); + // Apply the default cascade classifier to each independent channel (could be done in parallel) + for (int c=0; c<(int)channels.size(); c++) + { + er_filter1->run(channels[c], regions[c]); + er_filter2->run(channels[c], regions[c]); + } + + // Detect character groups + vector< vector > nm_region_groups; + vector nm_boxes; + erGrouping(image, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_HORIZ); + + + /*Text Recognition (OCR)*/ + + Ptr ocr = OCRTesseract::create(); + for (int i=0; i<(int)nm_boxes.size(); i++) + { + Mat group_img = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1); + er_draw(channels, regions, nm_region_groups[i], group_img); + group_img(nm_boxes[i]).copyTo(group_img); + copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0)); + + string output; + vector boxes; + vector words; + vector confidences; + ocr->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD); + + output.erase(remove(output.begin(), output.end(), '\n'), output.end()); + //cout << "OCR output = \"" << output << "\" lenght = " << output.size() << endl; + if (output.size() < 3) + continue; + + for (int j=0; j<(int)boxes.size(); j++) + { + boxes[j].x += nm_boxes[i].x-15; + boxes[j].y += nm_boxes[i].y-15; + + //cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl; + if ((words[j].size() < 2) || (confidences[j] < 51) || + ((words[j].size()==2) && (words[j][0] == words[j][1])) || + ((words[j].size()< 4) && (confidences[j] < 60)) || + isRepetitive(words[j])) + { + continue; + } + + if (find(example->lex.begin(), example->lex.end(), words[j]) == example->lex.end()) + { + continue; + } + + returnedNum++; + returnedNumEach++; + /*printf("%s\nx: %u, y: %u, width: %u, height: %u\n", + words[j].c_str(), boxes[j].tl().x, boxes[j].tl().y, boxes[j].br().x, boxes[j].br().y);*/ + for (vector::iterator it=example->tags.begin(); it!=example->tags.end(); ++it) + { + tag &t = (*it); + if (t.value==words[j] && + !(boxes[j].tl().x > t.x+t.width || boxes[j].br().x < t.x || + boxes[j].tl().y > t.y+t.height || boxes[j].br().y < t.y)) + { + returnedCorrectNum++; + returnedCorrectNumEach++; + break; + } + } + } + } + double p = 0.0; + if (0 != returnedNumEach) + { + p = 1.0*returnedCorrectNumEach/returnedNumEach; + } + double r = 0.0; + if (0 != correctNumEach) + { + r = 1.0*returnedCorrectNumEach/correctNumEach; + } + double f1 = 0.0; + if (0 != p+r) + { + f1 = 2*(p*r)/(p+r); + } + //printf("|%f|\n", f1); + f1Each.push_back(f1); + } + + /*double p = 1.0*returnedCorrectNum/returnedNum; + double r = 1.0*returnedCorrectNum/correctNum; + double f1 = 2*(p*r)/(p+r); + printf("f1: %f\n", f1);*/ + + double f1 = 0.0; + for (vector::iterator it=f1Each.begin(); it!=f1Each.end(); ++it) + { + f1 += *it; + } + f1 /= f1Each.size(); + printf("mean f1: %f\n", f1); + + return 0; +} diff --git a/modules/datasets/src/ar_hmdb.cpp b/modules/datasets/src/ar_hmdb.cpp index 6baa28c05fd..23ee4f088e6 100644 --- a/modules/datasets/src/ar_hmdb.cpp +++ b/modules/datasets/src/ar_hmdb.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/ar_hmdb.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS AR_hmdbImp : public AR_hmdb +class AR_hmdbImp : public AR_hmdb { public: AR_hmdbImp() {} diff --git a/modules/datasets/src/ar_sports.cpp b/modules/datasets/src/ar_sports.cpp index 83cf79c6b26..11c8062b611 100644 --- a/modules/datasets/src/ar_sports.cpp +++ b/modules/datasets/src/ar_sports.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/ar_sports.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS AR_sportsImp : public AR_sports +class AR_sportsImp : public AR_sports { public: AR_sportsImp() {} diff --git a/modules/datasets/src/dataset.cpp b/modules/datasets/src/dataset.cpp index 38b74167c77..5c8406b9ca9 100644 --- a/modules/datasets/src/dataset.cpp +++ b/modules/datasets/src/dataset.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/dataset.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { diff --git a/modules/datasets/src/fr_adience.cpp b/modules/datasets/src/fr_adience.cpp new file mode 100644 index 00000000000..9c077ca37b5 --- /dev/null +++ b/modules/datasets/src/fr_adience.cpp @@ -0,0 +1,174 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/datasets/fr_adience.hpp" +#include "opencv2/datasets/util.hpp" + +namespace cv +{ +namespace datasets +{ + +using namespace std; + +class FR_adienceImp : public FR_adience +{ +public: + FR_adienceImp() {} + //FR_adienceImp(const string &path); + virtual ~FR_adienceImp() {} + + virtual void load(const string &path); + +private: + void loadDataset(const string &path); + + void loadFile(const string &filename, vector< Ptr > &out); + void cv5ToSplits(vector< Ptr > fileList[5]); +}; + +/*FR_adienceImp::FR_adienceImp(const string &path) +{ + loadDataset(path); +}*/ + +void FR_adienceImp::load(const string &path) +{ + loadDataset(path); +} + +void FR_adienceImp::loadFile(const string &filename, vector< Ptr > &out) +{ + string line; + ifstream infile(filename.c_str()); + while (getline(infile, line)) + { + Ptr curr(new FR_adienceObj); + + vector elems; + split(line, elems, ','); + + curr->user_id = elems[0]; + curr->original_image = elems[1]; + curr->face_id = atoi(elems[2].c_str()); + curr->age = elems[3]; + if (elems[4]=="m") + { + curr->gender = male; + } else + if (elems[4]=="f") + { + curr->gender = female; + } else + { + curr->gender = none; + } + curr->x = atoi(elems[5].c_str()); + curr->y = atoi(elems[6].c_str()); + curr->dx = atoi(elems[7].c_str()); + curr->dy = atoi(elems[8].c_str()); + curr->tilt_ang = atoi(elems[9].c_str()); + curr->fiducial_yaw_angle = atoi(elems[10].c_str()); + curr->fiducial_score = atoi(elems[11].c_str()); + + out.push_back(curr); + } +} + +void FR_adienceImp::cv5ToSplits(vector< Ptr > fileList[5]) +{ + for (unsigned int i=0; i<5; ++i) + { + train.push_back(vector< Ptr >()); + test.push_back(vector< Ptr >()); + validation.push_back(vector< Ptr >()); + for (unsigned int j=0; j<5; ++j) + { + vector< Ptr > &currlist = fileList[j]; + if (i!=j) + { + for (vector< Ptr >::iterator it=currlist.begin(); it!=currlist.end(); ++it) + { + train.back().push_back(*it); + } + } else + { + for (vector< Ptr >::iterator it=currlist.begin(); it!=currlist.end(); ++it) + { + test.back().push_back(*it); + } + } + } + } +} + +void FR_adienceImp::loadDataset(const string &path) +{ + vector< Ptr > fileList[5]; + for (unsigned int i=0; i<5; ++i) + { + char tmp[3]; + sprintf(tmp, "%u", i); + string filename(path+"fold_"+string(tmp)+"_data.txt"); + + loadFile(filename, fileList[i]); + } + cv5ToSplits(fileList); + + for (unsigned int i=0; i<5; ++i) + { + char tmp[3]; + sprintf(tmp, "%u", i); + string filename(path+"fold_frontal_"+string(tmp)+"_data.txt"); + + fileList[i].clear(); + loadFile(filename, fileList[i]); + } + cv5ToSplits(fileList); +} + +Ptr FR_adience::create() +{ + return Ptr(new FR_adienceImp); +} + +} +} diff --git a/modules/datasets/src/fr_lfw.cpp b/modules/datasets/src/fr_lfw.cpp index 16e0133c7b3..3b768d9dd1e 100644 --- a/modules/datasets/src/fr_lfw.cpp +++ b/modules/datasets/src/fr_lfw.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/fr_lfw.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" #include @@ -52,7 +51,7 @@ namespace datasets using namespace std; -class CV_EXPORTS FR_lfwImp : public FR_lfw +class FR_lfwImp : public FR_lfw { public: FR_lfwImp() {} diff --git a/modules/datasets/src/gr_chalearn.cpp b/modules/datasets/src/gr_chalearn.cpp index 7cbed3ca9d8..a77f22608b9 100644 --- a/modules/datasets/src/gr_chalearn.cpp +++ b/modules/datasets/src/gr_chalearn.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/gr_chalearn.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS GR_chalearnImp : public GR_chalearn +class GR_chalearnImp : public GR_chalearn { public: GR_chalearnImp() {} diff --git a/modules/datasets/src/gr_skig.cpp b/modules/datasets/src/gr_skig.cpp index 954ed509ae6..3bdfd71eda4 100644 --- a/modules/datasets/src/gr_skig.cpp +++ b/modules/datasets/src/gr_skig.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/gr_skig.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" #include @@ -52,7 +51,7 @@ namespace datasets using namespace std; -class CV_EXPORTS GR_skigImp : public GR_skig +class GR_skigImp : public GR_skig { public: GR_skigImp() {} diff --git a/modules/datasets/src/hpe_humaneva.cpp b/modules/datasets/src/hpe_humaneva.cpp new file mode 100644 index 00000000000..0dfc1a06d6f --- /dev/null +++ b/modules/datasets/src/hpe_humaneva.cpp @@ -0,0 +1,232 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Itseez Inc or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/datasets/hpe_humaneva.hpp" +#include "opencv2/datasets/util.hpp" + +#include + +namespace cv +{ +namespace datasets +{ + +using namespace std; + +class HPE_humanevaImp : public HPE_humaneva +{ +public: + HPE_humanevaImp() {} + //HPE_humanevaImp(const string &path); + virtual ~HPE_humanevaImp() {} + + virtual void load(const string &path); + +private: + void loadDataset(const string &path); +}; + +/*HPE_humanevaImp::HPE_humanevaImp(const string &path) +{ + loadDataset(path); +}*/ + +void HPE_humanevaImp::load(const string &path) +{ + loadDataset(path); +} + +void HPE_humanevaImp::loadDataset(const string &path) +{ + train.push_back(vector< Ptr >()); + test.push_back(vector< Ptr >()); + validation.push_back(vector< Ptr >()); + + for (unsigned int i=1; i<=4; ++i) + { + char number[2]; + sprintf(number, "%u", i); + string pathDatasetI(path + "S" + number + "/Image_Data/"); + string pathDatasetS(path + "S" + number + "/Sync_Data/"); + + vector fileNames; + getDirList(pathDatasetI, fileNames); + for (vector::iterator it=fileNames.begin(); it!=fileNames.end(); ++it) + { + string &file = *it; + + vector elems; + split(file, elems, '_'); + if (elems.size() != 3) + { + continue; + } + + Ptr curr(new HPE_humanevaObj); + curr->person = (char)i; + curr->action = elems[0]; + curr->type1 = atoi(elems[1].c_str()); + curr->fileName = pathDatasetI+file; + + unsigned int type2End = 2; + if (elems[2][type2End+1] != ')') + { + type2End = 3; + } + curr->type2 = elems[2].substr(1, type2End); + + file = file.substr(0, file.length()-3) + "ofs"; + ifstream infileOFS((pathDatasetS + file).c_str()); + string line; + unsigned int j = 0; + while (getline(infileOFS, line)) + { + curr->ofs(0, j) = atof(line.c_str()); + ++j; + } + + train.back().push_back(curr); + } + } +} + +// +// HumanEva II +// +class HPE_humanevaImpII : public HPE_humaneva +{ +public: + HPE_humanevaImpII() {} + //HPE_humanevaImpII(const string &path); + virtual ~HPE_humanevaImpII() {} + + virtual void load(const string &path); + +private: + void loadDataset(const string &path); +}; + +/*HPE_humanevaImpII::HPE_humanevaImpII(const string &path) +{ + loadDataset(path); +}*/ + +void HPE_humanevaImpII::load(const string &path) +{ + loadDataset(path); +} + +void HPE_humanevaImpII::loadDataset(const string &path) +{ + train.push_back(vector< Ptr >()); + test.push_back(vector< Ptr >()); + validation.push_back(vector< Ptr >()); + + for (unsigned int i=1; i<=2; ++i) + { + char number[2]; + sprintf(number, "%u", i*2); // 2 & 4 + string pathDatasetI(path + "S" + number + "/Image_Data/"); + string pathDatasetS(path + "S" + number + "/Sync_Data/"); + + vector fileNames; + getDirList(pathDatasetI, fileNames); + for (vector::iterator it=fileNames.begin(); it!=fileNames.end(); ++it) + { + string &file = *it; + + vector elems; + split(file, elems, '_'); + if (elems.size() != 3) + { + continue; + } + + Ptr curr(new HPE_humanevaObj); + curr->person = (char)i; + curr->action = elems[0]; + curr->type1 = atoi(elems[1].c_str()); + curr->fileName = pathDatasetI+file; + + unsigned int type2End = 2; + if (elems[2][type2End+1] != ')') + { + type2End = 3; + } + curr->type2 = elems[2].substr(1, type2End); + + vector imageNames; + getDirList(curr->fileName, imageNames); + for (vector::iterator itI=imageNames.begin(); itI!=imageNames.end(); ++itI) + { + string &image = *itI; + if (image.substr(image.length()-3) == "png") + { + curr->imageNames.push_back(image); + } + } + + file = file.substr(0, file.length()) + ".ofs"; + ifstream infileOFS((pathDatasetS + file).c_str()); + string line; + unsigned int j = 0; + while (getline(infileOFS, line)) + { + curr->ofs(0, j) = atof(line.c_str()); + ++j; + } + + train.back().push_back(curr); + } + } +} + +Ptr HPE_humaneva::create(int num) +{ + if (humaneva_2==num) + { + return Ptr(new HPE_humanevaImpII); + } + return Ptr(new HPE_humanevaImp); +} + +} +} diff --git a/modules/datasets/src/hpe_parse.cpp b/modules/datasets/src/hpe_parse.cpp index 2c37a96e060..270f5b7e85e 100644 --- a/modules/datasets/src/hpe_parse.cpp +++ b/modules/datasets/src/hpe_parse.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/hpe_parse.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS HPE_parseImp : public HPE_parse +class HPE_parseImp : public HPE_parse { public: HPE_parseImp() {} diff --git a/modules/datasets/src/ir_affine.cpp b/modules/datasets/src/ir_affine.cpp index b8946106ca1..185ded18092 100644 --- a/modules/datasets/src/ir_affine.cpp +++ b/modules/datasets/src/ir_affine.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/ir_affine.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS IR_affineImp : public IR_affine +class IR_affineImp : public IR_affine { public: IR_affineImp() {} diff --git a/modules/datasets/src/ir_robot.cpp b/modules/datasets/src/ir_robot.cpp index 338633b4613..d48aa81348f 100644 --- a/modules/datasets/src/ir_robot.cpp +++ b/modules/datasets/src/ir_robot.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/ir_robot.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS IR_robotImp : public IR_robot +class IR_robotImp : public IR_robot { public: IR_robotImp() {} diff --git a/modules/datasets/src/is_bsds.cpp b/modules/datasets/src/is_bsds.cpp index b99315bc0b0..b4ff37796d2 100644 --- a/modules/datasets/src/is_bsds.cpp +++ b/modules/datasets/src/is_bsds.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/is_bsds.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS IS_bsdsImp : public IS_bsds +class IS_bsdsImp : public IS_bsds { public: IS_bsdsImp() {} diff --git a/modules/datasets/src/is_weizmann.cpp b/modules/datasets/src/is_weizmann.cpp index fc75ae742a1..8d09b946803 100644 --- a/modules/datasets/src/is_weizmann.cpp +++ b/modules/datasets/src/is_weizmann.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/is_weizmann.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS IS_weizmannImp : public IS_weizmann +class IS_weizmannImp : public IS_weizmann { public: IS_weizmannImp() {} diff --git a/modules/datasets/src/msm_epfl.cpp b/modules/datasets/src/msm_epfl.cpp index 663adedcbdc..36e5b529400 100644 --- a/modules/datasets/src/msm_epfl.cpp +++ b/modules/datasets/src/msm_epfl.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/msm_epfl.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS MSM_epflImp : public MSM_epfl +class MSM_epflImp : public MSM_epfl { public: MSM_epflImp() {} diff --git a/modules/datasets/src/msm_middlebury.cpp b/modules/datasets/src/msm_middlebury.cpp index a7d520d3fd9..6f6eb52dd93 100644 --- a/modules/datasets/src/msm_middlebury.cpp +++ b/modules/datasets/src/msm_middlebury.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/msm_middlebury.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS MSM_middleburyImp : public MSM_middlebury +class MSM_middleburyImp : public MSM_middlebury { public: MSM_middleburyImp() {} diff --git a/modules/datasets/src/or_imagenet.cpp b/modules/datasets/src/or_imagenet.cpp index 0fbebf976bf..05687b851ac 100644 --- a/modules/datasets/src/or_imagenet.cpp +++ b/modules/datasets/src/or_imagenet.cpp @@ -41,7 +41,8 @@ #include "opencv2/datasets/or_imagenet.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" + +#include namespace cv { @@ -50,7 +51,7 @@ namespace datasets using namespace std; -class CV_EXPORTS OR_imagenetImp : public OR_imagenet +class OR_imagenetImp : public OR_imagenet { public: OR_imagenetImp() {} @@ -61,6 +62,8 @@ class CV_EXPORTS OR_imagenetImp : public OR_imagenet private: void loadDataset(const string &path); + + void numberToString(int number, string &out); }; /*OR_imagenetImp::OR_imagenetImp(const string &path) @@ -73,30 +76,87 @@ void OR_imagenetImp::load(const string &path) loadDataset(path); } +void OR_imagenetImp::numberToString(int number, string &out) +{ + char numberStr[9]; + sprintf(numberStr, "%u", number); + for (unsigned int i=0; i<8-strlen(numberStr); ++i) + { + out += "0"; + } + out += numberStr; +} + void OR_imagenetImp::loadDataset(const string &path) { train.push_back(vector< Ptr >()); test.push_back(vector< Ptr >()); validation.push_back(vector< Ptr >()); - ifstream infile((path + "fall11_urls.txt").c_str()); + map labels; + ifstream infile((path + "labels.txt").c_str()); string line; while (getline(infile, line)) { vector elems; - split(line, elems, '\t'); + split(line, elems, ','); + string syn = elems[0]; + int number = atoi(elems[1].c_str()); + + labels.insert(make_pair(syn, number)); + } + string pathTrain(path + "train/"); + vector fileNames; + getDirList(pathTrain, fileNames); + for (vector::iterator it=fileNames.begin(); it!=fileNames.end(); ++it) + { + string pathSyn((*it) + "/"); + vector fileNamesSyn; + getDirList((pathTrain + pathSyn), fileNamesSyn); + for (vector::iterator itSyn=fileNamesSyn.begin(); itSyn!=fileNamesSyn.end(); ++itSyn) + { + Ptr curr(new OR_imagenetObj); + curr->image = "train/" + pathSyn + *itSyn; + curr->id = labels[*it]; + + train.back().push_back(curr); + } + } + + ifstream infileVal((path + "ILSVRC2010_validation_ground_truth.txt").c_str()); + while (getline(infileVal, line)) + { Ptr curr(new OR_imagenetObj); - curr->imageUrl = elems[1]; + curr->id = atoi(line.c_str()); + numberToString(validation.back().size()+1, curr->image); + curr->image = "val/ILSVRC2010_val_" + curr->image + ".JPEG"; - string id(elems[0]); - elems.clear(); - split(id, elems, '_'); + validation.back().push_back(curr); + } + + vector testGT; + ifstream infileTest((path + "ILSVRC2010_test_ground_truth.txt").c_str()); + while (getline(infileTest, line)) + { + testGT.push_back(atoi(line.c_str())); + } + if (testGT.size()==0) // have no test labels, set them to 1000 - unknown + { + for (int i=0; i<150000; ++i) + { + testGT.push_back(1000); // unknown + } + } - curr->wnid = elems[0]; - curr->id2 = atoi(elems[1].c_str()); + for (vector::iterator it=testGT.begin(); it!=testGT.end(); ++it) + { + Ptr curr(new OR_imagenetObj); + curr->id = *it; + numberToString(test.back().size()+1, curr->image); + curr->image = "test/ILSVRC2010_test_" + curr->image + ".JPEG"; - train.back().push_back(curr); + test.back().push_back(curr); } } diff --git a/modules/datasets/src/or_mnist.cpp b/modules/datasets/src/or_mnist.cpp index cc721ee158e..e85266c1e69 100644 --- a/modules/datasets/src/or_mnist.cpp +++ b/modules/datasets/src/or_mnist.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/or_mnist.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS OR_mnistImp : public OR_mnist +class OR_mnistImp : public OR_mnist { public: OR_mnistImp() {} diff --git a/modules/datasets/src/or_sun.cpp b/modules/datasets/src/or_sun.cpp index 29b710922e9..305eb2177dd 100644 --- a/modules/datasets/src/or_sun.cpp +++ b/modules/datasets/src/or_sun.cpp @@ -41,7 +41,8 @@ #include "opencv2/datasets/or_sun.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" + +#include namespace cv { @@ -50,7 +51,7 @@ namespace datasets using namespace std; -class CV_EXPORTS OR_sunImp : public OR_sun +class OR_sunImp : public OR_sun { public: OR_sunImp() {} @@ -61,6 +62,10 @@ class CV_EXPORTS OR_sunImp : public OR_sun private: void loadDataset(const string &path); + + void loadDatasetPart(const string &path, vector< Ptr > &dataset_); + + map pathLabel; }; /*OR_sunImp::OR_sunImp(const string &path) @@ -73,13 +78,40 @@ void OR_sunImp::load(const string &path) loadDataset(path); } -void OR_sunImp::loadDataset(const string &path) +void OR_sunImp::loadDatasetPart(const string &path, vector< Ptr > &dataset_) { - train.push_back(vector< Ptr >()); - test.push_back(vector< Ptr >()); - validation.push_back(vector< Ptr >()); + string line; + ifstream infile(path.c_str()); + while (getline(infile, line)) + { + Ptr curr(new OR_sunObj); + curr->label = 397; + curr->name = line; + + size_t pos = curr->name.rfind('/'); + if (pos != string::npos) + { + string labelStr(curr->name.substr(0, pos+1)); + map::iterator it = pathLabel.find(labelStr); + if (it != pathLabel.end()) + { + curr->label = (*it).second; + } else + { + curr->label = pathLabel.size(); + pathLabel.insert(make_pair(labelStr, curr->label)); + paths.push_back(labelStr); + } + curr->name = curr->name.substr(pos+1); + } - string classNameFile(path + "ClassName.txt"); + dataset_.push_back(curr); + } +} + +void OR_sunImp::loadDataset(const string &path) +{ + /*string classNameFile(path + "ClassName.txt"); ifstream infile(classNameFile.c_str()); string line; while (getline(infile, line)) @@ -96,6 +128,29 @@ void OR_sunImp::loadDataset(const string &path) } train.back().push_back(curr); + }*/ + + for (unsigned int i=1; i<=10; ++i) + { + char tmp[3]; + sprintf(tmp, "%u", i); + string numStr; + if (i<10) + { + numStr = string("0") + string(tmp); + } else + { + numStr = tmp; + } + string trainFile(path + "Partitions/Training_" + numStr + ".txt"); + string testFile(path + "Partitions/Testing_" + numStr + ".txt"); + + train.push_back(vector< Ptr >()); + test.push_back(vector< Ptr >()); + validation.push_back(vector< Ptr >()); + + loadDatasetPart(trainFile, train.back()); + loadDatasetPart(testFile, test.back()); } } diff --git a/modules/datasets/src/pd_caltech.cpp b/modules/datasets/src/pd_caltech.cpp index 74c596d442e..b3660b8d636 100644 --- a/modules/datasets/src/pd_caltech.cpp +++ b/modules/datasets/src/pd_caltech.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/pd_caltech.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS PD_caltechImp : public PD_caltech +class PD_caltechImp : public PD_caltech { public: PD_caltechImp() {} diff --git a/modules/datasets/src/slam_kitti.cpp b/modules/datasets/src/slam_kitti.cpp index ca1904d3ece..e1d82555c51 100644 --- a/modules/datasets/src/slam_kitti.cpp +++ b/modules/datasets/src/slam_kitti.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/slam_kitti.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS SLAM_kittiImp : public SLAM_kitti +class SLAM_kittiImp : public SLAM_kitti { public: SLAM_kittiImp() {} diff --git a/modules/datasets/src/slam_tumindoor.cpp b/modules/datasets/src/slam_tumindoor.cpp index cdc35431461..8db891658b2 100644 --- a/modules/datasets/src/slam_tumindoor.cpp +++ b/modules/datasets/src/slam_tumindoor.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/slam_tumindoor.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" #include @@ -52,7 +51,7 @@ namespace datasets using namespace std; -class CV_EXPORTS SLAM_tumindoorImp : public SLAM_tumindoor +class SLAM_tumindoorImp : public SLAM_tumindoor { public: SLAM_tumindoorImp() {} diff --git a/modules/datasets/src/tr_chars.cpp b/modules/datasets/src/tr_chars.cpp index 5291c5bc82f..261bf3a6c21 100644 --- a/modules/datasets/src/tr_chars.cpp +++ b/modules/datasets/src/tr_chars.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/tr_chars.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" namespace cv { @@ -50,7 +49,7 @@ namespace datasets using namespace std; -class CV_EXPORTS TR_charsImp : public TR_chars +class TR_charsImp : public TR_chars { public: TR_charsImp() {} diff --git a/modules/datasets/src/tr_svt.cpp b/modules/datasets/src/tr_svt.cpp index b2e11e788f9..af853d47969 100644 --- a/modules/datasets/src/tr_svt.cpp +++ b/modules/datasets/src/tr_svt.cpp @@ -41,7 +41,6 @@ #include "opencv2/datasets/tr_svt.hpp" #include "opencv2/datasets/util.hpp" -#include "precomp.hpp" #include @@ -53,7 +52,7 @@ namespace datasets using namespace std; using namespace tinyxml2; -class CV_EXPORTS TR_svtImp : public TR_svt +class TR_svtImp : public TR_svt { public: TR_svtImp() {} diff --git a/modules/datasetstools/README.md b/modules/datasetstools/README.md new file mode 100644 index 00000000000..e3c1339c8e5 --- /dev/null +++ b/modules/datasetstools/README.md @@ -0,0 +1,2 @@ +Tools for working with different datasets +========================================= \ No newline at end of file diff --git a/modules/face/README.md b/modules/face/README.md new file mode 100644 index 00000000000..f8abffc9280 --- /dev/null +++ b/modules/face/README.md @@ -0,0 +1,2 @@ +Recently added face recognition software +======================================== \ No newline at end of file diff --git a/modules/latentsvm/CMakeLists.txt b/modules/latentsvm/CMakeLists.txt new file mode 100644 index 00000000000..99f4c952c00 --- /dev/null +++ b/modules/latentsvm/CMakeLists.txt @@ -0,0 +1,2 @@ +set(the_description "Object Detection") +ocv_define_module(latentsvm opencv_core opencv_imgproc opencv_objdetect opencv_ts OPTIONAL opencv_highgui) diff --git a/modules/latentsvm/doc/latent_svm_caskade.rst b/modules/latentsvm/doc/latent_svm_caskade.rst new file mode 100644 index 00000000000..643b4f6292a --- /dev/null +++ b/modules/latentsvm/doc/latent_svm_caskade.rst @@ -0,0 +1,276 @@ +Latent SVM +=============================================================== + +Discriminatively Trained Part Based Models for Object Detection +--------------------------------------------------------------- + +The object detector described below has been initially proposed by +P.F. Felzenszwalb in [Felzenszwalb2010a]_. It is based on a +Dalal-Triggs detector that uses a single filter on histogram of +oriented gradients (HOG) features to represent an object category. +This detector uses a sliding window approach, where a filter is +applied at all positions and scales of an image. The first +innovation is enriching the Dalal-Triggs model using a +star-structured part-based model defined by a "root" filter +(analogous to the Dalal-Triggs filter) plus a set of parts filters +and associated deformation models. The score of one of star models +at a particular position and scale within an image is the score of +the root filter at the given location plus the sum over parts of the +maximum, over placements of that part, of the part filter score on +its location minus a deformation cost easuring the deviation of the +part from its ideal location relative to the root. Both root and +part filter scores are defined by the dot product between a filter +(a set of weights) and a subwindow of a feature pyramid computed +from the input image. Another improvement is a representation of the +class of models by a mixture of star models. The score of a mixture +model at a particular position and scale is the maximum over +components, of the score of that component model at the given +location. + +The detector was dramatically speeded-up with cascade algorithm +proposed by P.F. Felzenszwalb in [Felzenszwalb2010b]_. The algorithm +prunes partial hypotheses using thresholds on their scores.The basic +idea of the algorithm is to use a hierarchy of models defined by an +ordering of the original model's parts. For a model with (n+1) +parts, including the root, a sequence of (n+1) models is obtained. +The i-th model in this sequence is defined by the first i parts from +the original model. Using this hierarchy, low scoring hypotheses can be +pruned after looking at the best configuration of a subset of the parts. +Hypotheses that score high under a weak model are evaluated further using +a richer model. + +In OpenCV there are C implementation of Latent SVM and C++ wrapper of it. +C version is the structure :ocv:struct:`CvObjectDetection` and a set of functions +working with this structure (see :ocv:func:`cvLoadLatentSvmDetector`, +:ocv:func:`cvReleaseLatentSvmDetector`, :ocv:func:`cvLatentSvmDetectObjects`). +C++ version is the class :ocv:class:`LatentSvmDetector` and has slightly different +functionality in contrast with C version - it supports loading and detection +of several models. + +There are two examples of Latent SVM usage: ``samples/c/latentsvmdetect.cpp`` +and ``samples/cpp/latentsvm_multidetect.cpp``. + +.. highlight:: c + + +CvLSVMFilterPosition +-------------------- +.. ocv:struct:: CvLSVMFilterPosition + + Structure describes the position of the filter in the feature pyramid. + + .. ocv:member:: unsigned int l + + level in the feature pyramid + + .. ocv:member:: unsigned int x + + x-coordinate in level l + + .. ocv:member:: unsigned int y + + y-coordinate in level l + + +CvLSVMFilterObject +------------------ +.. ocv:struct:: CvLSVMFilterObject + + Description of the filter, which corresponds to the part of the object. + + .. ocv:member:: CvLSVMFilterPosition V + + ideal (penalty = 0) position of the partial filter + from the root filter position (V_i in the paper) + + .. ocv:member:: float fineFunction[4] + + vector describes penalty function (d_i in the paper) + pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2 + + .. ocv:member:: int sizeX + .. ocv:member:: int sizeY + + Rectangular map (sizeX x sizeY), + every cell stores feature vector (dimension = p) + + .. ocv:member:: int numFeatures + + number of features + + .. ocv:member:: float *H + + matrix of feature vectors to set and get + feature vectors (i,j) used formula H[(j * sizeX + i) * p + k], + where k - component of feature vector in cell (i, j) + +CvLatentSvmDetector +------------------- +.. ocv:struct:: CvLatentSvmDetector + + Structure contains internal representation of trained Latent SVM detector. + + .. ocv:member:: int num_filters + + total number of filters (root plus part) in model + + .. ocv:member:: int num_components + + number of components in model + + .. ocv:member:: int* num_part_filters + + array containing number of part filters for each component + + .. ocv:member:: CvLSVMFilterObject** filters + + root and part filters for all model components + + .. ocv:member:: float* b + + biases for all model components + + .. ocv:member:: float score_threshold + + confidence level threshold + + +CvObjectDetection +----------------- +.. ocv:struct:: CvObjectDetection + + Structure contains the bounding box and confidence level for detected object. + + .. ocv:member:: CvRect rect + + bounding box for a detected object + + .. ocv:member:: float score + + confidence level + + +cvLoadLatentSvmDetector +----------------------- +Loads trained detector from a file. + +.. ocv:function:: CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename) + + :param filename: Name of the file containing the description of a trained detector + + +cvReleaseLatentSvmDetector +-------------------------- +Release memory allocated for CvLatentSvmDetector structure. + +.. ocv:function:: void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector) + + :param detector: CvLatentSvmDetector structure to be released + + +cvLatentSvmDetectObjects +------------------------ +Find rectangular regions in the given image that are likely to contain objects +and corresponding confidence levels. + +.. ocv:function:: CvSeq* cvLatentSvmDetectObjects( IplImage* image, CvLatentSvmDetector* detector, CvMemStorage* storage, float overlap_threshold=0.5f, int numThreads=-1 ) + + :param image: image + :param detector: LatentSVM detector in internal representation + :param storage: Memory storage to store the resultant sequence of the object candidate rectangles + :param overlap_threshold: Threshold for the non-maximum suppression algorithm + :param numThreads: Number of threads used in parallel version of the algorithm + +.. highlight:: cpp + +LatentSvmDetector +----------------- +.. ocv:class:: LatentSvmDetector + +This is a C++ wrapping class of Latent SVM. It contains internal representation of several +trained Latent SVM detectors (models) and a set of methods to load the detectors and detect objects +using them. + +LatentSvmDetector::ObjectDetection +---------------------------------- +.. ocv:struct:: LatentSvmDetector::ObjectDetection + + Structure contains the detection information. + + .. ocv:member:: Rect rect + + bounding box for a detected object + + .. ocv:member:: float score + + confidence level + + .. ocv:member:: int classID + + class (model or detector) ID that detect an object + + +LatentSvmDetector::LatentSvmDetector +------------------------------------ +Two types of constructors. + +.. ocv:function:: LatentSvmDetector::LatentSvmDetector() + +.. ocv:function:: LatentSvmDetector::LatentSvmDetector(const vector& filenames, const vector& classNames=vector()) + + + + :param filenames: A set of filenames storing the trained detectors (models). Each file contains one model. See examples of such files here /opencv_extra/testdata/cv/latentsvmdetector/models_VOC2007/. + + :param classNames: A set of trained models names. If it's empty then the name of each model will be constructed from the name of file containing the model. E.g. the model stored in "/home/user/cat.xml" will get the name "cat". + +LatentSvmDetector::~LatentSvmDetector +------------------------------------- +Destructor. + +.. ocv:function:: LatentSvmDetector::~LatentSvmDetector() + +LatentSvmDetector::~clear +------------------------- +Clear all trained models and their names stored in an class object. + +.. ocv:function:: void LatentSvmDetector::clear() + +LatentSvmDetector::load +----------------------- +Load the trained models from given ``.xml`` files and return ``true`` if at least one model was loaded. + +.. ocv:function:: bool LatentSvmDetector::load( const vector& filenames, const vector& classNames=vector() ) + + :param filenames: A set of filenames storing the trained detectors (models). Each file contains one model. See examples of such files here /opencv_extra/testdata/cv/latentsvmdetector/models_VOC2007/. + + :param classNames: A set of trained models names. If it's empty then the name of each model will be constructed from the name of file containing the model. E.g. the model stored in "/home/user/cat.xml" will get the name "cat". + +LatentSvmDetector::detect +------------------------- +Find rectangular regions in the given image that are likely to contain objects of loaded classes (models) +and corresponding confidence levels. + +.. ocv:function:: void LatentSvmDetector::detect( const Mat& image, vector& objectDetections, float overlapThreshold=0.5f, int numThreads=-1 ) + + :param image: An image. + :param objectDetections: The detections: rectangulars, scores and class IDs. + :param overlapThreshold: Threshold for the non-maximum suppression algorithm. + :param numThreads: Number of threads used in parallel version of the algorithm. + +LatentSvmDetector::getClassNames +-------------------------------- +Return the class (model) names that were passed in constructor or method ``load`` or extracted from models filenames in those methods. + +.. ocv:function:: const vector& LatentSvmDetector::getClassNames() const + +LatentSvmDetector::getClassCount +-------------------------------- +Return a count of loaded models (classes). + +.. ocv:function:: size_t LatentSvmDetector::getClassCount() const + + +.. [Felzenszwalb2010a] Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. and Ramanan, D. *Object Detection with Discriminatively Trained Part Based Models*. PAMI, vol. 32, no. 9, pp. 1627-1645, September 2010 +.. [Felzenszwalb2010b] Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. *Cascade Object Detection with Deformable Part Models*. CVPR 2010, pp. 2241-2248 + diff --git a/modules/latentsvm/include/opencv2/latentsvm.hpp b/modules/latentsvm/include/opencv2/latentsvm.hpp new file mode 100644 index 00000000000..94f23bba59f --- /dev/null +++ b/modules/latentsvm/include/opencv2/latentsvm.hpp @@ -0,0 +1,86 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_LATENTSVM_HPP__ +#define __OPENCV_LATENTSVM_HPP__ + +#include "opencv2/core.hpp" + +#include +#include +#include + +namespace cv +{ + +namespace lsvm +{ + +class CV_EXPORTS_W LSVMDetector +{ +public: + + struct CV_EXPORTS_W ObjectDetection + { + ObjectDetection(); + ObjectDetection( const Rect& rect, float score, int classID=-1 ); + Rect rect; + float score; + int classID; + }; + + virtual bool isEmpty() const = 0; + virtual void detect(cv::Mat const &image, CV_OUT std::vector &objects, + float overlapThreshold=0.5f ) = 0; + + virtual std::vector const& getClassNames() const = 0; + virtual size_t getClassCount() const = 0; + + static cv::Ptr create(std::vector const &filenames, + std::vector const &classNames = std::vector()); + + virtual ~LSVMDetector(){} +}; + +} // namespace lsvm +} // namespace cv + +#endif diff --git a/modules/latentsvm/perf/perf_cascadeclassifier.cpp b/modules/latentsvm/perf/perf_cascadeclassifier.cpp new file mode 100644 index 00000000000..2823302c77d --- /dev/null +++ b/modules/latentsvm/perf/perf_cascadeclassifier.cpp @@ -0,0 +1,50 @@ +#include "perf_precomp.hpp" +#include + +using namespace std; +using namespace cv; +using namespace perf; +using std::tr1::make_tuple; +using std::tr1::get; + +typedef std::tr1::tuple ImageName_MinSize_t; +typedef perf::TestBaseWithParam ImageName_MinSize; + +PERF_TEST_P(ImageName_MinSize, CascadeClassifierLBPFrontalFace, + testing::Combine(testing::Values( std::string("cv/shared/lena.png"), + std::string("cv/shared/1_itseez-0000289.png"), + std::string("cv/shared/1_itseez-0000492.png"), + std::string("cv/shared/1_itseez-0000573.png")), + testing::Values(24, 30, 40, 50, 60, 70, 80, 90) + ) + ) +{ + const string filename = get<0>(GetParam()); + int min_size = get<1>(GetParam()); + Size minSize(min_size, min_size); + + CascadeClassifier cc(getDataPath("cv/cascadeandhog/cascades/lbpcascade_frontalface.xml")); + if (cc.empty()) + FAIL() << "Can't load cascade file"; + + Mat img = imread(getDataPath(filename), 0); + if (img.empty()) + FAIL() << "Can't load source image"; + + vector faces; + + equalizeHist(img, img); + declare.in(img); + + while(next()) + { + faces.clear(); + + startTimer(); + cc.detectMultiScale(img, faces, 1.1, 3, 0, minSize); + stopTimer(); + } + + std::sort(faces.begin(), faces.end(), comparators::RectLess()); + SANITY_CHECK(faces, 3.001 * faces.size()); +} \ No newline at end of file diff --git a/modules/latentsvm/perf/perf_main.cpp b/modules/latentsvm/perf/perf_main.cpp new file mode 100644 index 00000000000..69b8ecac5b8 --- /dev/null +++ b/modules/latentsvm/perf/perf_main.cpp @@ -0,0 +1,3 @@ +#include "perf_precomp.hpp" + +CV_PERF_TEST_MAIN(objdetect) diff --git a/modules/latentsvm/perf/perf_precomp.cpp b/modules/latentsvm/perf/perf_precomp.cpp new file mode 100644 index 00000000000..8552ac3d428 --- /dev/null +++ b/modules/latentsvm/perf/perf_precomp.cpp @@ -0,0 +1 @@ +#include "perf_precomp.hpp" diff --git a/modules/latentsvm/perf/perf_precomp.hpp b/modules/latentsvm/perf/perf_precomp.hpp new file mode 100644 index 00000000000..29427f7c767 --- /dev/null +++ b/modules/latentsvm/perf/perf_precomp.hpp @@ -0,0 +1,20 @@ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/objdetect.hpp" +#include "opencv2/highgui.hpp" + +#ifdef GTEST_CREATE_SHARED_LIBRARY +#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined +#endif + +#endif diff --git a/modules/latentsvm/src/_lsvmc_error.h b/modules/latentsvm/src/_lsvmc_error.h new file mode 100644 index 00000000000..489d4cdd963 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_error.h @@ -0,0 +1,61 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef LSVM_ERROR +#define LSVM_ERROR + +#define LATENT_SVM_OK 0 +#define LATENT_SVM_MEM_NULL 2 +#define DISTANCE_TRANSFORM_OK 1 +#define DISTANCE_TRANSFORM_GET_INTERSECTION_ERROR -1 +#define DISTANCE_TRANSFORM_ERROR -2 +#define DISTANCE_TRANSFORM_EQUAL_POINTS -3 +#define LATENT_SVM_GET_FEATURE_PYRAMID_FAILED -4 +#define LATENT_SVM_SEARCH_OBJECT_FAILED -5 +#define LATENT_SVM_FAILED_SUPERPOSITION -6 +#define FILTER_OUT_OF_BOUNDARIES -7 +#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8 +#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9 +#define FFT_OK 2 +#define FFT_ERROR -10 +#define LSVM_PARSER_FILE_NOT_FOUND -11 + +#endif diff --git a/modules/latentsvm/src/_lsvmc_function.h b/modules/latentsvm/src/_lsvmc_function.h new file mode 100644 index 00000000000..5fa1f2bc785 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_function.h @@ -0,0 +1,58 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef FUNCTION_SC +#define FUNCTION_SC + +#include "_lsvmc_types.h" + +namespace cv +{ +namespace lsvm +{ + +float calcM (int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter); +float calcM_PCA (int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter); +float calcM_PCA_cash(int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter, float * cashM, int * maskM, int step); +float calcFine (const CvLSVMFilterObjectCaskade *filter, int di, int dj); +} +} +#endif \ No newline at end of file diff --git a/modules/latentsvm/src/_lsvmc_latentsvm.h b/modules/latentsvm/src/_lsvmc_latentsvm.h new file mode 100644 index 00000000000..61029567e62 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_latentsvm.h @@ -0,0 +1,379 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/*****************************************************************************/ +/* Latent SVM prediction API */ +/*****************************************************************************/ + +#ifndef _LATENTSVM_H_ +#define _LATENTSVM_H_ + +#include +#include "_lsvmc_types.h" +#include "_lsvmc_error.h" +#include "_lsvmc_routine.h" + +namespace cv +{ +namespace lsvm +{ + +////////////////////////////////////////////////////////////// +// Building feature pyramid +// (pyramid constructed both contrast and non-contrast image) +////////////////////////////////////////////////////////////// + +void FeaturePyramid32(CvLSVMFeaturePyramidCaskade* H, int maxX, int maxY); + +/* +// Creation PSA feature pyramid +// +// API +// featurePyramid* createPSA_FeaturePyramid(featurePyramid* H); + +// INPUT +// H - feature pyramid +// OUTPUT +// RESULT +// PSA feature pyramid +*/ +CvLSVMFeaturePyramidCaskade* createPCA_FeaturePyramid(CvLSVMFeaturePyramidCaskade* H, + CvLatentSvmDetectorCaskade* detector, + int maxX, int maxY); + +/* +// Getting feature pyramid +// +// API +// int getFeaturePyramid(IplImage * image, const CvLSVMFilterObjectCaskade **all_F, + const int n_f, + const int lambda, const int k, + const int startX, const int startY, + const int W, const int H, featurePyramid **maps); +// INPUT +// image - image +// lambda - resize scale +// k - size of cells +// startX - X coordinate of the image rectangle to search +// startY - Y coordinate of the image rectangle to search +// W - width of the image rectangle to search +// H - height of the image rectangle to search +// OUTPUT +// maps - feature maps for all levels +// RESULT +// Error status +*/ +int getFeaturePyramid(IplImage * image, CvLSVMFeaturePyramidCaskade **maps); + +/* +// Getting feature map for the selected subimage +// +// API +// int getFeatureMaps(const IplImage * image, const int k, featureMap **map); +// INPUT +// image - selected subimage +// k - size of cells +// OUTPUT +// map - feature map +// RESULT +// Error status +*/ +int getFeatureMaps(const IplImage * image, const int k, CvLSVMFeatureMapCaskade **map); + + +/* +// Feature map Normalization and Truncation +// +// API +// int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa); +// INPUT +// map - feature map +// alfa - truncation threshold +// OUTPUT +// map - truncated and normalized feature map +// RESULT +// Error status +*/ +int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa); + +/* +// Feature map reduction +// In each cell we reduce dimension of the feature vector +// according to original paper special procedure +// +// API +// int PCAFeatureMaps(featureMap *map) +// INPUT +// map - feature map +// OUTPUT +// map - feature map +// RESULT +// Error status +*/ +int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map); + +////////////////////////////////////////////////////////////// +// search object +////////////////////////////////////////////////////////////// + +/* +// Transformation filter displacement from the block space +// to the space of pixels at the initial image +// +// API +// int convertPoints(int countLevel, int lambda, + int initialImageLevel, + CvPoint *points, int *levels, + CvPoint **partsDisplacement, int kPoints, int n, + int maxXBorder, + int maxYBorder); +// INPUT +// countLevel - the number of levels in the feature pyramid +// lambda - method parameter +// initialImageLevel - level of feature pyramid that contains feature map + for initial image +// points - the set of root filter positions (in the block space) +// levels - the set of levels +// partsDisplacement - displacement of part filters (in the block space) +// kPoints - number of root filter positions +// n - number of part filters +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// points - the set of root filter positions (in the space of pixels) +// partsDisplacement - displacement of part filters (in the space of pixels) +// RESULT +// Error status +*/ +int convertPoints(int countLevel, int lambda, + int initialImageLevel, + CvPoint *points, int *levels, + CvPoint **partsDisplacement, int kPoints, int n, + int maxXBorder, + int maxYBorder); + +/* +// Elimination boxes that are outside the image boudaries +// +// API +// int clippingBoxes(int width, int height, + CvPoint *points, int kPoints); +// INPUT +// width - image wediht +// height - image heigth +// points - a set of points (coordinates of top left or + bottom right corners) +// kPoints - points number +// OUTPUT +// points - updated points (if coordinates less than zero then + set zero coordinate, if coordinates more than image + size then set coordinates equal image size) +// RESULT +// Error status +*/ +int clippingBoxes(int width, int height, + CvPoint *points, int kPoints); + +/* +// Creation feature pyramid with nullable border +// +// API +// featurePyramid* createFeaturePyramidWithBorder(const IplImage *image, + int maxXBorder, int maxYBorder); + +// INPUT +// image - initial image +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// RESULT +// Feature pyramid with nullable border +*/ +CvLSVMFeaturePyramidCaskade* createFeaturePyramidWithBorder(IplImage *image, + int maxXBorder, int maxYBorder); + +/* +// Computation root filters displacement and values of score function +// +// API +// int searchObjectThresholdSomeComponents(const featurePyramid *H, + const CvLSVMFilterObjectCaskade **filters, + int kComponents, const int *kPartFilters, + const float *b, float scoreThreshold, + CvPoint **points, CvPoint **oppPoints, + float **score, int *kPoints); +// INPUT +// H - feature pyramid +// filters - filters (root filter then it's part filters, etc.) +// kComponents - root filters number +// kPartFilters - array of part filters number for each component +// b - array of linear terms +// scoreThreshold - score threshold +// OUTPUT +// points - root filters displacement (top left corners) +// oppPoints - root filters displacement (bottom right corners) +// score - array of score values +// kPoints - number of boxes +// RESULT +// Error status +*/ +int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramidCaskade *H, + const CvLSVMFeaturePyramidCaskade *H_PCA, + const CvLSVMFilterObjectCaskade **filters, + int kComponents, const int *kPartFilters, + const float *b, float scoreThreshold, + CvPoint **points, CvPoint **oppPoints, + float **score, int *kPoints); + +/* +// Compute opposite point for filter box +// +// API +// int getOppositePoint(CvPoint point, + int sizeX, int sizeY, + float step, int degree, + CvPoint *oppositePoint); + +// INPUT +// point - coordinates of filter top left corner + (in the space of pixels) +// (sizeX, sizeY) - filter dimension in the block space +// step - scaling factor +// degree - degree of the scaling factor +// OUTPUT +// oppositePoint - coordinates of filter bottom corner + (in the space of pixels) +// RESULT +// Error status +*/ +int getOppositePoint(CvPoint point, + int sizeX, int sizeY, + float step, int degree, + CvPoint *oppositePoint); + +/* +// Drawing root filter boxes +// +// API +// int showRootFilterBoxes(const IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); +// INPUT +// image - initial image +// filter - root filter object +// points - a set of points +// levels - levels of feature pyramid +// kPoints - number of points +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// window contained initial image and filter boxes +// RESULT +// Error status +*/ +int showRootFilterBoxes(IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); + +/* +// Drawing part filter boxes +// +// API +// int showPartFilterBoxes(const IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); +// INPUT +// image - initial image +// filters - a set of part filters +// n - number of part filters +// partsDisplacement - a set of points +// levels - levels of feature pyramid +// kPoints - number of foot filter positions +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// window contained initial image and filter boxes +// RESULT +// Error status +*/ +int showPartFilterBoxes(IplImage *image, + const CvLSVMFilterObjectCaskade **filters, + int n, CvPoint **partsDisplacement, + int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); + +/* +// Drawing boxes +// +// API +// int showBoxes(const IplImage *img, + const CvPoint *points, const CvPoint *oppositePoints, int kPoints, + CvScalar color, int thickness, int line_type, int shift); +// INPUT +// img - initial image +// points - top left corner coordinates +// oppositePoints - right bottom corner coordinates +// kPoints - points number +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// RESULT +// Error status +*/ +int showBoxes(IplImage *img, + const CvPoint *points, const CvPoint *oppositePoints, int kPoints, + CvScalar color, int thickness, int line_type, int shift); +} +} +#endif diff --git a/modules/latentsvm/src/_lsvmc_matching.h b/modules/latentsvm/src/_lsvmc_matching.h new file mode 100644 index 00000000000..3175d6047b4 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_matching.h @@ -0,0 +1,130 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/*****************************************************************************/ +/* Matching procedure API */ +/*****************************************************************************/ +// +#ifndef _LSVM_MATCHING_H_ +#define _LSVM_MATCHING_H_ + +#include "_lsvmc_latentsvm.h" +#include "_lsvmc_error.h" +#include "_lsvmc_routine.h" + +namespace cv +{ +namespace lsvm +{ + + +/* +// Computation border size for feature map +// +// API +// int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by); +// INPUT +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// bx - border size (X-direction) +// by - border size (Y-direction) +// RESULT +// Error status +*/ +int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by); + +/* +// Addition nullable border to the feature map +// +// API +// int addNullableBorder(featureMap *map, int bx, int by); +// INPUT +// map - feature map +// bx - border size (X-direction) +// by - border size (Y-direction) +// OUTPUT +// RESULT +// Error status +*/ +int addNullableBorder(CvLSVMFeatureMapCaskade *map, int bx, int by); + +/* +// Perform non-maximum suppression algorithm (described in original paper) +// to remove "similar" bounding boxes +// +// API +// int nonMaximumSuppression(int numBoxes, const CvPoint *points, + const CvPoint *oppositePoints, const float *score, + float overlapThreshold, + int *numBoxesout, CvPoint **pointsOut, + CvPoint **oppositePointsOut, float **scoreOut); +// INPUT +// numBoxes - number of bounding boxes +// points - array of left top corner coordinates +// oppositePoints - array of right bottom corner coordinates +// score - array of detection scores +// overlapThreshold - threshold: bounding box is removed if overlap part + is greater than passed value +// OUTPUT +// numBoxesOut - the number of bounding boxes algorithm returns +// pointsOut - array of left top corner coordinates +// oppositePointsOut - array of right bottom corner coordinates +// scoreOut - array of detection scores +// RESULT +// Error status +*/ +int nonMaximumSuppression(int numBoxes, const CvPoint *points, + const CvPoint *oppositePoints, const float *score, + float overlapThreshold, + int *numBoxesOut, CvPoint **pointsOut, + CvPoint **oppositePointsOut, float **scoreOut); +int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, + const int *kPartFilters, + unsigned int *maxXBorder, unsigned int *maxYBorder); +//} + +int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, + const int *kPartFilters, + unsigned int *maxXBorder, unsigned int *maxYBorder); +} +} +#endif diff --git a/modules/latentsvm/src/_lsvmc_parser.h b/modules/latentsvm/src/_lsvmc_parser.h new file mode 100644 index 00000000000..eb9a7f52ac4 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_parser.h @@ -0,0 +1,128 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef LSVM_PARSER +#define LSVM_PARSER + +#include "_lsvmc_types.h" + +#define MODEL 1 +#define P 2 +#define COMP 3 +#define SCORE 4 +#define RFILTER 100 +#define PFILTERs 101 +#define PFILTER 200 +#define SIZEX 150 +#define SIZEY 151 +#define WEIGHTS 152 +#define TAGV 300 +#define Vx 350 +#define Vy 351 +#define TAGD 400 +#define Dx 451 +#define Dy 452 +#define Dxx 453 +#define Dyy 454 +#define BTAG 500 + +#define PCA 5 +#define WEIGHTSPCA 162 +#define CASCADE_Th 163 +#define HYPOTHES_PCA 164 +#define DEFORM_PCA 165 +#define HYPOTHES 166 +#define DEFORM 167 + +#define PCACOEFF 6 + +#define STEP_END 1000 + +#define EMODEL (STEP_END + MODEL) +#define EP (STEP_END + P) +#define ECOMP (STEP_END + COMP) +#define ESCORE (STEP_END + SCORE) +#define ERFILTER (STEP_END + RFILTER) +#define EPFILTERs (STEP_END + PFILTERs) +#define EPFILTER (STEP_END + PFILTER) +#define ESIZEX (STEP_END + SIZEX) +#define ESIZEY (STEP_END + SIZEY) +#define EWEIGHTS (STEP_END + WEIGHTS) +#define ETAGV (STEP_END + TAGV) +#define EVx (STEP_END + Vx) +#define EVy (STEP_END + Vy) +#define ETAGD (STEP_END + TAGD) +#define EDx (STEP_END + Dx) +#define EDy (STEP_END + Dy) +#define EDxx (STEP_END + Dxx) +#define EDyy (STEP_END + Dyy) +#define EBTAG (STEP_END + BTAG) + +#define EPCA (STEP_END + PCA) +#define EWEIGHTSPCA (STEP_END + WEIGHTSPCA) +#define ECASCADE_Th (STEP_END + CASCADE_Th) +#define EHYPOTHES_PCA (STEP_END + HYPOTHES_PCA) +#define EDEFORM_PCA (STEP_END + DEFORM_PCA) +#define EHYPOTHES (STEP_END + HYPOTHES) +#define EDEFORM (STEP_END + DEFORM) + +#define EPCACOEFF (STEP_END + PCACOEFF) + +namespace cv +{ +namespace lsvm +{ + + int loadModel( + // input parametr + const char *modelPath,// model path + + // output parametrs + CvLSVMFilterObjectCaskade ***filters, + int *kFilters, + int *kComponents, + int **kPartFilters, + float **b, + float *scoreThreshold, + float ** PCAcoeff); +} +} +#endif diff --git a/modules/latentsvm/src/_lsvmc_resizeimg.h b/modules/latentsvm/src/_lsvmc_resizeimg.h new file mode 100644 index 00000000000..8c03a5ad2a3 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_resizeimg.h @@ -0,0 +1,56 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef _LSVM_RESIZEIMG_H_ +#define _LSVM_RESIZEIMG_H_ + +#include "_lsvmc_types.h" + +namespace cv +{ +namespace lsvm +{ + +IplImage * resize_opencv (IplImage * img, float scale); +} +} + +#endif diff --git a/modules/latentsvm/src/_lsvmc_routine.h b/modules/latentsvm/src/_lsvmc_routine.h new file mode 100644 index 00000000000..1e251e4a011 --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_routine.h @@ -0,0 +1,76 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef _LSVM_ROUTINE_H_ +#define _LSVM_ROUTINE_H_ + +#include "_lsvmc_types.h" +#include "_lsvmc_error.h" + +namespace cv +{ +namespace lsvm +{ + + +////////////////////////////////////////////////////////////// +// Memory management routines +// All paramaters names correspond to previous data structures description +// All "alloc" functions return allocated memory for 1 object +// with all fields including arrays +// Error status is return value +////////////////////////////////////////////////////////////// +int allocFilterObject(CvLSVMFilterObjectCaskade **obj, const int sizeX, const int sizeY, + const int p); +int freeFilterObject (CvLSVMFilterObjectCaskade **obj); + +int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, const int sizeY, + const int p); +int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj); + +int allocFeaturePyramidObject(CvLSVMFeaturePyramidCaskade **obj, + const int countLevel); + +int freeFeaturePyramidObject (CvLSVMFeaturePyramidCaskade **obj); + +} +} +#endif diff --git a/modules/latentsvm/src/_lsvmc_types.h b/modules/latentsvm/src/_lsvmc_types.h new file mode 100644 index 00000000000..9c15ba0ae9f --- /dev/null +++ b/modules/latentsvm/src/_lsvmc_types.h @@ -0,0 +1,186 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef SVM_TYPE +#define SVM_TYPE + +#include "float.h" + +#define PI CV_PI + +#define EPS 0.000001 + +#define F_MAX FLT_MAX +#define F_MIN -FLT_MAX + +// The number of elements in bin +// The number of sectors in gradient histogram building +#define NUM_SECTOR 9 + +// The number of levels in image resize procedure +// We need Lambda levels to resize image twice +#define LAMBDA 10 + +// Block size. Used in feature pyramid building procedure +#define SIDE_LENGTH 8 + +#define VAL_OF_TRUNCATE 0.2f +namespace cv +{ +namespace lsvm +{ +////////////////////////////////////////////////////////////// +// main data structures // +////////////////////////////////////////////////////////////// + +// data type: STRUCT CvObjectDetection +// structure contains the bounding box and confidence level for detected object +// rect - bounding box for a detected object +// score - confidence level + +typedef struct CvObjectDetection +{ + cv::Rect rect; + float score; +} CvObjectDetection; + + +// DataType: STRUCT featureMap +// FEATURE MAP DESCRIPTION +// Rectangular map (sizeX x sizeY), +// every cell stores feature vector (dimension = numFeatures) +// map - matrix of feature vectors +// to set and get feature vectors (i,j) +// used formula map[(j * sizeX + i) * p + k], where +// k - component of feature vector in cell (i, j) +typedef struct{ + int sizeX; + int sizeY; + int numFeatures; + float *map; +} CvLSVMFeatureMapCaskade; + +// DataType: STRUCT featurePyramid +// +// numLevels - number of levels in the feature pyramid +// pyramid - array of pointers to feature map at different levels +typedef struct{ + int numLevels; + CvLSVMFeatureMapCaskade **pyramid; +} CvLSVMFeaturePyramidCaskade; + +// DataType: STRUCT filterDisposition +// The structure stores preliminary results in optimization process +// with objective function D +// +// x - array with X coordinates of optimization problems solutions +// y - array with Y coordinates of optimization problems solutions +// score - array with optimal objective values +typedef struct{ + float *score; + int *x; + int *y; +} CvLSVMFilterDisposition; + +// DataType: STRUCT position +// Structure describes the position of the filter in the feature pyramid +// l - level in the feature pyramid +// (x, y) - coordinate in level l + +typedef struct CvLSVMFilterPosition +{ + int x; + int y; + int l; +} CvLSVMFilterPosition; + +// DataType: STRUCT filterObject +// Description of the filter, which corresponds to the part of the object +// V - ideal (penalty = 0) position of the partial filter +// from the root filter position (V_i in the paper) +// penaltyFunction - vector describes penalty function (d_i in the paper) +// pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2 +// FILTER DESCRIPTION +// Rectangular map (sizeX x sizeY), +// every cell stores feature vector (dimension = p) +// H - matrix of feature vectors +// to set and get feature vectors (i,j) +// used formula H[(j * sizeX + i) * p + k], where +// k - component of feature vector in cell (i, j) +// END OF FILTER DESCRIPTION + +typedef struct CvLSVMFilterObjectCaskade{ + CvLSVMFilterPosition V; + float fineFunction[4]; + int sizeX; + int sizeY; + int numFeatures; + float *H; + float *H_PCA; + float Hypothesis, Deformation; + float Hypothesis_PCA, Deformation_PCA; + int deltaX; + int deltaY; +} CvLSVMFilterObjectCaskade; + +// data type: STRUCT CvLatentSvmDetector +// structure contains internal representation of trained Latent SVM detector +// num_filters - total number of filters (root plus part) in model +// num_components - number of components in model +// num_part_filters - array containing number of part filters for each component +// filters - root and part filters for all model components +// b - biases for all model components +// score_threshold - confidence level threshold + +typedef struct CvLatentSvmDetectorCaskade +{ + int num_filters; + int num_components; + int* num_part_filters; + CvLSVMFilterObjectCaskade** filters; + float* b; + float score_threshold; + float *pca; + int pca_size; +} CvLatentSvmDetectorCaskade; +} +} +#endif diff --git a/modules/latentsvm/src/lsvmc_featurepyramid.cpp b/modules/latentsvm/src/lsvmc_featurepyramid.cpp new file mode 100644 index 00000000000..7dbfc1e5838 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_featurepyramid.cpp @@ -0,0 +1,624 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_latentsvm.h" +#include "_lsvmc_resizeimg.h" + +#ifdef HAVE_TBB +#include +#include "tbb/parallel_for.h" +#include "tbb/blocked_range.h" +#endif + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +namespace cv +{ +namespace lsvm +{ + +int getPathOfFeaturePyramid(IplImage * image, + float step, int numStep, int startIndex, + int sideLength, CvLSVMFeaturePyramidCaskade **maps); + +/* +// Getting feature map for the selected subimage +// +// API +// int getFeatureMaps(const IplImage * image, const int k, featureMap **map); +// INPUT +// image - selected subimage +// k - size of cells +// OUTPUT +// map - feature map +// RESULT +// Error status +*/ +int getFeatureMaps(const IplImage* image, const int k, CvLSVMFeatureMapCaskade **map) +{ + int sizeX, sizeY; + int p, px, stringSize; + int height, width, numChannels; + int i, j, kk, c, ii, jj, d; + float * datadx, * datady; + + int ch; + float magnitude, x, y, tx, ty; + + IplImage * dx, * dy; + int *nearest; + float *w, a_x, b_x; + + float kernel[3] = {-1.f, 0.f, 1.f}; + CvMat kernel_dx = cvMat(1, 3, CV_32F, kernel); + CvMat kernel_dy = cvMat(3, 1, CV_32F, kernel); + + float * r; + int * alfa; + + float boundary_x[NUM_SECTOR + 1]; + float boundary_y[NUM_SECTOR + 1]; + float max, dotProd; + int maxi; + + height = image->height; + width = image->width ; + + numChannels = image->nChannels; + + dx = cvCreateImage(cvSize(image->width, image->height), + IPL_DEPTH_32F, 3); + dy = cvCreateImage(cvSize(image->width, image->height), + IPL_DEPTH_32F, 3); + + sizeX = width / k; + sizeY = height / k; + px = 3 * NUM_SECTOR; + p = px; + stringSize = sizeX * p; + allocFeatureMapObject(map, sizeX, sizeY, p); + + cvFilter2D(image, dx, &kernel_dx, cvPoint(-1, 0)); + cvFilter2D(image, dy, &kernel_dy, cvPoint(0, -1)); + + float arg_vector; + for(i = 0; i <= NUM_SECTOR; i++) + { + arg_vector = ( (float) i ) * ( (float)(PI) / (float)(NUM_SECTOR) ); + boundary_x[i] = cosf(arg_vector); + boundary_y[i] = sinf(arg_vector); + }/*for(i = 0; i <= NUM_SECTOR; i++) */ + + r = (float *)malloc( sizeof(float) * (width * height)); + alfa = (int *)malloc( sizeof(int ) * (width * height * 2)); + + for(j = 1; j < height - 1; j++) + { + datadx = (float*)(dx->imageData + dx->widthStep * j); + datady = (float*)(dy->imageData + dy->widthStep * j); + for(i = 1; i < width - 1; i++) + { + c = 0; + x = (datadx[i * numChannels + c]); + y = (datady[i * numChannels + c]); + + r[j * width + i] =sqrtf(x * x + y * y); + for(ch = 1; ch < numChannels; ch++) + { + tx = (datadx[i * numChannels + ch]); + ty = (datady[i * numChannels + ch]); + magnitude = sqrtf(tx * tx + ty * ty); + if(magnitude > r[j * width + i]) + { + r[j * width + i] = magnitude; + c = ch; + x = tx; + y = ty; + } + }/*for(ch = 1; ch < numChannels; ch++)*/ + + max = boundary_x[0] * x + boundary_y[0] * y; + maxi = 0; + for (kk = 0; kk < NUM_SECTOR; kk++) + { + dotProd = boundary_x[kk] * x + boundary_y[kk] * y; + if (dotProd > max) + { + max = dotProd; + maxi = kk; + } + else + { + if (-dotProd > max) + { + max = -dotProd; + maxi = kk + NUM_SECTOR; + } + } + } + alfa[j * width * 2 + i * 2 ] = maxi % NUM_SECTOR; + alfa[j * width * 2 + i * 2 + 1] = maxi; + }/*for(i = 0; i < width; i++)*/ + }/*for(j = 0; j < height; j++)*/ + + nearest = (int *)malloc(sizeof(int ) * k); + w = (float*)malloc(sizeof(float) * (k * 2)); + + for(i = 0; i < k / 2; i++) + { + nearest[i] = -1; + }/*for(i = 0; i < k / 2; i++)*/ + for(i = k / 2; i < k; i++) + { + nearest[i] = 1; + }/*for(i = k / 2; i < k; i++)*/ + + for(j = 0; j < k / 2; j++) + { + b_x = k / 2 + j + 0.5f; + a_x = k / 2 - j - 0.5f; + w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); + w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x)); + }/*for(j = 0; j < k / 2; j++)*/ + for(j = k / 2; j < k; j++) + { + a_x = j - k / 2 + 0.5f; + b_x =-j + k / 2 - 0.5f + k; + w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); + w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x)); + }/*for(j = k / 2; j < k; j++)*/ + + for(i = 0; i < sizeY; i++) + { + for(j = 0; j < sizeX; j++) + { + for(ii = 0; ii < k; ii++) + { + for(jj = 0; jj < k; jj++) + { + if ((i * k + ii > 0) && + (i * k + ii < height - 1) && + (j * k + jj > 0) && + (j * k + jj < width - 1)) + { + d = (k * i + ii) * width + (j * k + jj); + (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 ]] += + r[d] * w[ii * 2] * w[jj * 2]; + (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += + r[d] * w[ii * 2] * w[jj * 2]; + if ((i + nearest[ii] >= 0) && + (i + nearest[ii] <= sizeY - 1)) + { + (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 ] ] += + r[d] * w[ii * 2 + 1] * w[jj * 2 ]; + (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += + r[d] * w[ii * 2 + 1] * w[jj * 2 ]; + } + if ((j + nearest[jj] >= 0) && + (j + nearest[jj] <= sizeX - 1)) + { + (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] += + r[d] * w[ii * 2] * w[jj * 2 + 1]; + (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += + r[d] * w[ii * 2] * w[jj * 2 + 1]; + } + if ((i + nearest[ii] >= 0) && + (i + nearest[ii] <= sizeY - 1) && + (j + nearest[jj] >= 0) && + (j + nearest[jj] <= sizeX - 1)) + { + (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] += + r[d] * w[ii * 2 + 1] * w[jj * 2 + 1]; + (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += + r[d] * w[ii * 2 + 1] * w[jj * 2 + 1]; + } + } + }/*for(jj = 0; jj < k; jj++)*/ + }/*for(ii = 0; ii < k; ii++)*/ + }/*for(j = 1; j < sizeX - 1; j++)*/ + }/*for(i = 1; i < sizeY - 1; i++)*/ + + cvReleaseImage(&dx); + cvReleaseImage(&dy); + + + free(w); + free(nearest); + + free(r); + free(alfa); + + return LATENT_SVM_OK; +} + +/* +// Feature map Normalization and Truncation +// +// API +// int normalizeAndTruncate(featureMap *map, const float alfa); +// INPUT +// map - feature map +// alfa - truncation threshold +// OUTPUT +// map - truncated and normalized feature map +// RESULT +// Error status +*/ +int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa) +{ + int i,j, ii; + int sizeX, sizeY, p, pos, pp, xp, pos1, pos2; + float * partOfNorm; // norm of C(i, j) + float * newData; + float valOfNorm; + + sizeX = map->sizeX; + sizeY = map->sizeY; + partOfNorm = (float *)malloc (sizeof(float) * (sizeX * sizeY)); + + p = NUM_SECTOR; + xp = NUM_SECTOR * 3; + pp = NUM_SECTOR * 12; + + for(i = 0; i < sizeX * sizeY; i++) + { + valOfNorm = 0.0f; + pos = i * map->numFeatures; + for(j = 0; j < p; j++) + { + valOfNorm += map->map[pos + j] * map->map[pos + j]; + }/*for(j = 0; j < p; j++)*/ + partOfNorm[i] = valOfNorm; + }/*for(i = 0; i < sizeX * sizeY; i++)*/ + + sizeX -= 2; + sizeY -= 2; + + newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp)); +//normalization + for(i = 1; i <= sizeY; i++) + { + for(j = 1; j <= sizeX; j++) + { + valOfNorm = sqrtf( + partOfNorm[(i )*(sizeX + 2) + (j )] + + partOfNorm[(i )*(sizeX + 2) + (j + 1)] + + partOfNorm[(i + 1)*(sizeX + 2) + (j )] + + partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON; + pos1 = (i ) * (sizeX + 2) * xp + (j ) * xp; + pos2 = (i-1) * (sizeX ) * pp + (j-1) * pp; + for(ii = 0; ii < p; ii++) + { + newData[pos2 + ii ] = map->map[pos1 + ii ] / valOfNorm; + }/*for(ii = 0; ii < p; ii++)*/ + for(ii = 0; ii < 2 * p; ii++) + { + newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm; + }/*for(ii = 0; ii < 2 * p; ii++)*/ + valOfNorm = sqrtf( + partOfNorm[(i )*(sizeX + 2) + (j )] + + partOfNorm[(i )*(sizeX + 2) + (j + 1)] + + partOfNorm[(i - 1)*(sizeX + 2) + (j )] + + partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON; + for(ii = 0; ii < p; ii++) + { + newData[pos2 + ii + p ] = map->map[pos1 + ii ] / valOfNorm; + }/*for(ii = 0; ii < p; ii++)*/ + for(ii = 0; ii < 2 * p; ii++) + { + newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm; + }/*for(ii = 0; ii < 2 * p; ii++)*/ + valOfNorm = sqrtf( + partOfNorm[(i )*(sizeX + 2) + (j )] + + partOfNorm[(i )*(sizeX + 2) + (j - 1)] + + partOfNorm[(i + 1)*(sizeX + 2) + (j )] + + partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON; + for(ii = 0; ii < p; ii++) + { + newData[pos2 + ii + p * 2] = map->map[pos1 + ii ] / valOfNorm; + }/*for(ii = 0; ii < p; ii++)*/ + for(ii = 0; ii < 2 * p; ii++) + { + newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm; + }/*for(ii = 0; ii < 2 * p; ii++)*/ + valOfNorm = sqrtf( + partOfNorm[(i )*(sizeX + 2) + (j )] + + partOfNorm[(i )*(sizeX + 2) + (j - 1)] + + partOfNorm[(i - 1)*(sizeX + 2) + (j )] + + partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON; + for(ii = 0; ii < p; ii++) + { + newData[pos2 + ii + p * 3 ] = map->map[pos1 + ii ] / valOfNorm; + }/*for(ii = 0; ii < p; ii++)*/ + for(ii = 0; ii < 2 * p; ii++) + { + newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm; + }/*for(ii = 0; ii < 2 * p; ii++)*/ + }/*for(j = 1; j <= sizeX; j++)*/ + }/*for(i = 1; i <= sizeY; i++)*/ +//truncation + for(i = 0; i < sizeX * sizeY * pp; i++) + { + if(newData [i] > alfa) newData [i] = alfa; + }/*for(i = 0; i < sizeX * sizeY * pp; i++)*/ +//swop data + + map->numFeatures = pp; + map->sizeX = sizeX; + map->sizeY = sizeY; + + free (map->map); + free (partOfNorm); + + map->map = newData; + + return LATENT_SVM_OK; +} +/* +// Feature map reduction +// In each cell we reduce dimension of the feature vector +// according to original paper special procedure +// +// API +// int PCAFeatureMaps(featureMap *map) +// INPUT +// map - feature map +// OUTPUT +// map - feature map +// RESULT +// Error status +*/ +int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map) +{ + int i,j, ii, jj, k; + int sizeX, sizeY, p, pp, xp, yp, pos1, pos2; + float * newData; + float val; + float nx, ny; + + sizeX = map->sizeX; + sizeY = map->sizeY; + p = map->numFeatures; + pp = NUM_SECTOR * 3 + 4; + yp = 4; + xp = NUM_SECTOR; + + nx = 1.0f / sqrtf((float)(xp * 2)); + ny = 1.0f / sqrtf((float)(yp )); + + newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp)); + + for(i = 0; i < sizeY; i++) + { + for(j = 0; j < sizeX; j++) + { + pos1 = ((i)*sizeX + j)*p; + pos2 = ((i)*sizeX + j)*pp; + k = 0; + for(jj = 0; jj < xp * 2; jj++) + { + val = 0; + for(ii = 0; ii < yp; ii++) + { + val += map->map[pos1 + yp * xp + ii * xp * 2 + jj]; + }/*for(ii = 0; ii < yp; ii++)*/ + newData[pos2 + k] = val * ny; + k++; + }/*for(jj = 0; jj < xp * 2; jj++)*/ + for(jj = 0; jj < xp; jj++) + { + val = 0; + for(ii = 0; ii < yp; ii++) + { + val += map->map[pos1 + ii * xp + jj]; + }/*for(ii = 0; ii < yp; ii++)*/ + newData[pos2 + k] = val * ny; + k++; + }/*for(jj = 0; jj < xp; jj++)*/ + for(ii = 0; ii < yp; ii++) + { + val = 0; + for(jj = 0; jj < 2 * xp; jj++) + { + val += map->map[pos1 + yp * xp + ii * xp * 2 + jj]; + }/*for(jj = 0; jj < xp; jj++)*/ + newData[pos2 + k] = val * nx; + k++; + } /*for(ii = 0; ii < yp; ii++)*/ + }/*for(j = 0; j < sizeX; j++)*/ + }/*for(i = 0; i < sizeY; i++)*/ +//swop data + + map->numFeatures = pp; + + free (map->map); + + map->map = newData; + + return LATENT_SVM_OK; +} + +int getPathOfFeaturePyramid(IplImage * image, + float step, int numStep, int startIndex, + int sideLength, CvLSVMFeaturePyramidCaskade **maps) +{ + CvLSVMFeatureMapCaskade *map; + IplImage *scaleTmp; + float scale; + int i; + + for(i = 0; i < numStep; i++) + { + scale = 1.0f / powf(step, (float)i); + scaleTmp = resize_opencv (image, scale); + getFeatureMaps(scaleTmp, sideLength, &map); + normalizeAndTruncate(map, VAL_OF_TRUNCATE); + PCAFeatureMaps(map); + (*maps)->pyramid[startIndex + i] = map; + cvReleaseImage(&scaleTmp); + }/*for(i = 0; i < numStep; i++)*/ + return LATENT_SVM_OK; +} + +#ifdef HAVE_TBB + +class PathOfFeaturePyramid : public ParallelLoopBody{ +public: + IplImage * image; + float step; + int startIndex; + int sideLength; + CvLSVMFeaturePyramidCaskade **maps; + + void operator() (const Range& range) const + { + CvLSVMFeatureMapCaskade *map; + IplImage *scaleTmp; + float scale; + int err; + + for( int i=range.start; i!=range.end; ++i ) + { + scale = 1.0f / powf(step, (float)i); + scaleTmp = resize_opencv (image, scale); + err = getFeatureMaps(scaleTmp, sideLength, &map); + err = normalizeAndTruncate(map, VAL_OF_TRUNCATE); + err = PCAFeatureMaps(map); + (*maps)->pyramid[startIndex + i] = map; + cvReleaseImage(&scaleTmp); + } + } +}; + + +int getPathOfFeaturePyramid_TBB(IplImage * image, + float step, int numStep, int startIndex, + int sideLength, CvLSVMFeaturePyramidCaskade **maps) +{ + PathOfFeaturePyramid str; + str.step = step; + str.startIndex = startIndex; + str.sideLength = sideLength; + str.maps = maps; + str.image = image; + + cv::parallel_for_(Range( 0, numStep ), str ); + + return LATENT_SVM_OK; +} +#endif + +/* +// Getting feature pyramid +// +// API +// int getFeaturePyramid(IplImage * image, const CvLSVMFilterObjectCaskade **all_F, + const int n_f, + const int lambda, const int k, + const int startX, const int startY, + const int W, const int H, featurePyramid **maps); +// INPUT +// image - image +// OUTPUT +// maps - feature maps for all levels +// RESULT +// Error status +*/ +int getFeaturePyramid(IplImage * image, CvLSVMFeaturePyramidCaskade **maps) +{ + IplImage *imgResize; + float step; + int numStep; + int maxNumCells; + int W, H; + + if(image->depth == IPL_DEPTH_32F) + { + imgResize = image; + } + else + { + imgResize = cvCreateImage(cvSize(image->width , image->height) , + IPL_DEPTH_32F , 3); + cvConvert(image, imgResize); + } + + W = imgResize->width; + H = imgResize->height; + + step = powf(2.0f, 1.0f / ((float)LAMBDA)); + maxNumCells = W / SIDE_LENGTH; + if( maxNumCells > H / SIDE_LENGTH ) + { + maxNumCells = H / SIDE_LENGTH; + } + numStep = (int)(logf((float) maxNumCells / (5.0f)) / logf( step )) + 1; + + allocFeaturePyramidObject(maps, numStep + LAMBDA); + +#ifdef HAVE_TBB + getPathOfFeaturePyramid_TBB(imgResize, step , LAMBDA, 0, + SIDE_LENGTH / 2, maps); + getPathOfFeaturePyramid_TBB(imgResize, step, numStep, LAMBDA, + SIDE_LENGTH , maps); +#else + getPathOfFeaturePyramid(imgResize, step , LAMBDA, 0, + SIDE_LENGTH / 2, maps); + getPathOfFeaturePyramid(imgResize, step, numStep, LAMBDA, + SIDE_LENGTH , maps); +#endif + + if(image->depth != IPL_DEPTH_32F) + { + cvReleaseImage(&imgResize); + } + + return LATENT_SVM_OK; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_function.cpp b/modules/latentsvm/src/lsvmc_function.cpp new file mode 100644 index 00000000000..a515db2865b --- /dev/null +++ b/modules/latentsvm/src/lsvmc_function.cpp @@ -0,0 +1,127 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_function.h" +namespace cv +{ +namespace lsvm +{ + +float calcM (int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter){ + int i, j; + float m = 0.0f; + for(j = dj; j < dj + filter->sizeY; j++){ + for(i = di * H->pyramid[k]->numFeatures; i < (di + filter->sizeX) * H->pyramid[k]->numFeatures; i++){ + m += H->pyramid[k]->map[(j * H->pyramid[k]->sizeX ) * H->pyramid[k]->numFeatures + i] * + filter ->H [((j - dj) * filter->sizeX - di) * H->pyramid[k]->numFeatures + i]; + } + } + return m; +} +float calcM_PCA(int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter){ + int i, j; + float m = 0.0f; + for(j = dj; j < dj + filter->sizeY; j++){ + for(i = di * H->pyramid[k]->numFeatures; i < (di + filter->sizeX) * H->pyramid[k]->numFeatures; i++){ + m += H->pyramid[k]->map[(j * H->pyramid[k]->sizeX ) * H->pyramid[k]->numFeatures + i] * + filter ->H_PCA [((j - dj) * filter->sizeX - di) * H->pyramid[k]->numFeatures + i]; + } + } + + return m; +} +float calcM_PCA_cash(int k,int di,int dj, const CvLSVMFeaturePyramidCaskade * H, const CvLSVMFilterObjectCaskade *filter, float * cashM, int * maskM, int step){ + int i, j, n; + float m = 0.0f; + float tmp1, tmp2, tmp3, tmp4; + float res; + int pos; + float *a, *b; + + pos = dj * step + di; + + if(!((maskM[pos / (sizeof(int) * 8)]) & (1 << pos % (sizeof(int) * 8)))) + { + for(j = dj; j < dj + filter->sizeY; j++) + { + a = H->pyramid[k]->map + (j * H->pyramid[k]->sizeX) * H->pyramid[k]->numFeatures + + di * H->pyramid[k]->numFeatures; + b = filter ->H_PCA + (j - dj) * filter->sizeX * H->pyramid[k]->numFeatures; + n = ((di + filter->sizeX) * H->pyramid[k]->numFeatures) - + (di * H->pyramid[k]->numFeatures); + + res = 0.0f; + tmp1 = 0.0f; tmp2 = 0.0f; tmp3 = 0.0f; tmp4 = 0.0f; + + for (i = 0; i < (n >> 2); ++i) + { + tmp1 += a[4 * i + 0] * b[4 * i + 0]; + tmp2 += a[4 * i + 1] * b[4 * i + 1]; + tmp3 += a[4 * i + 2] * b[4 * i + 2]; + tmp4 += a[4 * i + 3] * b[4 * i + 3]; + } + + for (i = (n >> 2) << 2; i < n; ++i) //? + { + res += a[i] * b[i]; + } + + res += tmp1 + tmp2 + tmp3 + tmp4; + + m += res; + } + + cashM[pos ] = m; + maskM[pos / (sizeof(int) * 8)] |= 1 << pos % (sizeof(int) * 8); + } + else + { + m = cashM[pos]; + } + return m; +} +float calcFine (const CvLSVMFilterObjectCaskade *filter, int di, int dj){ + return filter->fineFunction[0] * di + filter->fineFunction[1] * dj + + filter->fineFunction[2] * di * di + filter->fineFunction[3] * dj * dj; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_latentsvm.cpp b/modules/latentsvm/src/lsvmc_latentsvm.cpp new file mode 100644 index 00000000000..e2d65945d62 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_latentsvm.cpp @@ -0,0 +1,1018 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_latentsvm.h" +#include "_lsvmc_matching.h" +#include "_lsvmc_function.h" + +#ifdef HAVE_TBB +#include +#include "tbb/parallel_for.h" +#include "tbb/blocked_range.h" +#endif + +namespace cv +{ +namespace lsvm +{ + +int estimateBoxes(CvPoint *points, int *levels, int kPoints, + int sizeX, int sizeY, CvPoint **oppositePoints); + +int searchObjectThreshold(const CvLSVMFeaturePyramidCaskade *H, + const CvLSVMFeaturePyramidCaskade *H_PCA, + const CvLSVMFilterObjectCaskade **all_F, int n, + float b, + int maxXBorder, int maxYBorder, + float scoreThreshold, + CvPoint **points, int **levels, int *kPoints, + float **score, CvPoint ***partsDisplacement); + +void FeaturePyramid32(CvLSVMFeaturePyramidCaskade* H, int maxX, int maxY){ + CvLSVMFeatureMapCaskade *H32; + int i, j, k, l; + int p = H->pyramid[0]->numFeatures; + + for(i = 0; i < H->numLevels; i++){ + allocFeatureMapObject(&(H32), H->pyramid[i]->sizeX, H->pyramid[i]->sizeY, p + 1); + for(j = 0; j < (H->pyramid[i]->sizeX * H->pyramid[i]->sizeY); j++){ + for(k = 0; k < p; k++){ + H32->map[j * (p + 1) + k] = H->pyramid[i]->map[j * p + k]; + } + H32->map[j * (p + 1) + k] = 1.0f; + } + freeFeatureMapObject(&(H->pyramid[i])); + H->pyramid[i] = H32; + } + for(l = 0; l < H->numLevels; l++){ + for(j = maxY + 1; j < (H->pyramid[l]->sizeY - maxY - 1); j++){ + for(i = maxX + 1; i < (H->pyramid[l]->sizeX - maxX - 1); i++){ + H->pyramid[l]->map[ (j * H->pyramid[l]->sizeX + i) * (p+1) + p] = 0.0f; + } + } + } +} + +CvLSVMFeaturePyramidCaskade* createPCA_FeaturePyramid(CvLSVMFeaturePyramidCaskade* H, CvLatentSvmDetectorCaskade* detector, int maxX, int maxY){ + CvLSVMFeaturePyramidCaskade *H_PCA; + int i, j, k, l; + int max_l = detector->pca_size; + int p = H->pyramid[0]->numFeatures; + + allocFeaturePyramidObject(&H_PCA, H->numLevels); + + for(i = 0; i < H->numLevels; i++){ + allocFeatureMapObject(&(H_PCA->pyramid[i]), H->pyramid[i]->sizeX, H->pyramid[i]->sizeY, 6); + for(j = 0; j < (H->pyramid[i]->sizeX * H->pyramid[i]->sizeY); j++){ + for(k = 0; k < 5; k++){ + for(l = 0; l < max_l; l++){ + H_PCA->pyramid[i]->map[j * 6 + k] += + detector->pca[k * max_l + l] * H->pyramid[i]->map[j * p + l]; + } + } + H_PCA->pyramid[i]->map[j * 6 + k] = 1.0f; + } + } + for(l = 0; l < H->numLevels; l++){ + for(j = maxY + 1; j < (H->pyramid[l]->sizeY - maxY - 1); j++){ + for(i = maxX + 1; i < (H->pyramid[l]->sizeX - maxX - 1); i++){ + H_PCA->pyramid[l]->map[ (j * H->pyramid[l]->sizeX + i) * 6 + 5] = 0.0f; + } + } + } + + return H_PCA; +} + +/* +// Transformation filter displacement from the block space +// to the space of pixels at the initial image +// +// API +// int convertPoints(int countLevel, CvPoint *points, int *levels, + CvPoint **partsDisplacement, int kPoints, int n); +// INPUT +// countLevel - the number of levels in the feature pyramid +// points - the set of root filter positions (in the block space) +// levels - the set of levels +// partsDisplacement - displacement of part filters (in the block space) +// kPoints - number of root filter positions +// n - number of part filters +// initialImageLevel - level that contains features for initial image +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// points - the set of root filter positions (in the space of pixels) +// partsDisplacement - displacement of part filters (in the space of pixels) +// RESULT +// Error status +*/ +int convertPoints(int /*countLevel*/, int lambda, + int initialImageLevel, + CvPoint *points, int *levels, + CvPoint **partsDisplacement, int kPoints, int n, + int maxXBorder, + int maxYBorder) +{ + int i, j; + float step, scale; + step = powf( 2.0f, 1.0f / ((float)lambda) ); + + //computeBorderSize(maxXBorder, maxYBorder, &bx, &by); + + for (i = 0; i < kPoints; i++) + { + // scaling factor for root filter + scale = SIDE_LENGTH * powf(step, (float)(levels[i] - initialImageLevel)); + points[i].x = (int)((points[i].x - maxXBorder) * scale); + points[i].y = (int)((points[i].y - maxYBorder) * scale); + + // scaling factor for part filters + scale = SIDE_LENGTH * powf(step, (float)(levels[i] - lambda - initialImageLevel)); + for (j = 0; j < n; j++) + { + partsDisplacement[i][j].x = (int)((partsDisplacement[i][j].x - + maxXBorder) * scale); + partsDisplacement[i][j].y = (int)((partsDisplacement[i][j].y - + maxYBorder) * scale); + } + } + return LATENT_SVM_OK; +} + +/* +// Elimination boxes that are outside the image boudaries +// +// API +// int clippingBoxes(int width, int height, + CvPoint *points, int kPoints); +// INPUT +// width - image wediht +// height - image heigth +// points - a set of points (coordinates of top left or + bottom right corners) +// kPoints - points number +// OUTPUT +// points - updated points (if coordinates less than zero then + set zero coordinate, if coordinates more than image + size then set coordinates equal image size) +// RESULT +// Error status +*/ +int clippingBoxes(int width, int height, + CvPoint *points, int kPoints) +{ + int i; + for (i = 0; i < kPoints; i++) + { + if (points[i].x > width - 1) + { + points[i].x = width - 1; + } + if (points[i].x < 0) + { + points[i].x = 0; + } + if (points[i].y > height - 1) + { + points[i].y = height - 1; + } + if (points[i].y < 0) + { + points[i].y = 0; + } + } + return LATENT_SVM_OK; +} + +/* +// Creation feature pyramid with nullable border +// +// API +// featurePyramid* createFeaturePyramidWithBorder(const IplImage *image, + int maxXBorder, int maxYBorder); + +// INPUT +// image - initial image +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// RESULT +// Feature pyramid with nullable border +*/ +CvLSVMFeaturePyramidCaskade* createFeaturePyramidWithBorder(IplImage *image, + int maxXBorder, int maxYBorder) +{ + int opResult; + int bx, by; + int level; + CvLSVMFeaturePyramidCaskade *H; + + // Obtaining feature pyramid + opResult = getFeaturePyramid(image, &H); + + if (opResult != LATENT_SVM_OK) + { + freeFeaturePyramidObject(&H); + return NULL; + } /* if (opResult != LATENT_SVM_OK) */ + + // Addition nullable border for each feature map + // the size of the border for root filters + bx = maxXBorder + 1; + by = maxYBorder + 1; + for (level = 0; level < H->numLevels; level++) + { + addNullableBorder(H->pyramid[level], bx, by); + } + return H; +} + +/* +// Computation right bottom corners coordinates of bounding boxes +// +// API +// int estimateBoxes(CvPoint *points, int *levels, int kPoints, + int sizeX, int sizeY, CvPoint **oppositePoints); +// INPUT +// points - left top corners coordinates of bounding boxes +// levels - levels of feature pyramid where points were found +// (sizeX, sizeY) - size of root filter +// OUTPUT +// oppositePoins - right bottom corners coordinates of bounding boxes +// RESULT +// Error status +*/ +int estimateBoxes(CvPoint *points, int *levels, int kPoints, + int sizeX, int sizeY, CvPoint **oppositePoints) +{ + int i; + float step; + + step = powf( 2.0f, 1.0f / ((float)(LAMBDA))); + + *oppositePoints = (CvPoint *)malloc(sizeof(CvPoint) * kPoints); + for (i = 0; i < kPoints; i++) + { + getOppositePoint(points[i], sizeX, sizeY, step, levels[i] - LAMBDA, &((*oppositePoints)[i])); + } + return LATENT_SVM_OK; +} + +/* +// Computation of the root filter displacement and values of score function +// +// API +// int searchObjectThreshold(const featurePyramid *H, + const CvLSVMFilterObjectCaskade **all_F, int n, + float b, + int maxXBorder, int maxYBorder, + float scoreThreshold, + CvPoint **points, int **levels, int *kPoints, + float **score, CvPoint ***partsDisplacement); +// INPUT +// H - feature pyramid +// all_F - the set of filters (the first element is root filter, + other elements - part filters) +// n - the number of part filters +// b - linear term of the score function +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// scoreThreshold - score threshold +// OUTPUT +// points - positions (x, y) of the upper-left corner + of root filter frame +// levels - levels that correspond to each position +// kPoints - number of positions +// score - values of the score function +// partsDisplacement - part filters displacement for each position + of the root filter +// RESULT +// Error status +*/ +int searchObjectThreshold(const CvLSVMFeaturePyramidCaskade *H, + const CvLSVMFeaturePyramidCaskade *H_PCA, + const CvLSVMFilterObjectCaskade **all_F, int n, + float b, + int maxXBorder, int maxYBorder, + float scoreThreshold, + CvPoint **points, int **levels, int *kPoints, + float **score, CvPoint ***partsDisplacement) +{ + int opResult = LATENT_SVM_OK; + + int i, j, k, path; + int di, dj, ii; + + //int *map,jj, nomer; + //FILE *dump; + + float p; + float fine; + float mpath; + + CvPoint *tmpPoints; + int *tmpLevels; + float **tmpAScore; + + int flag,flag2; + + CvPoint *PCAPoints; + int *PCALevels; + float **PCAAScore; + int PCAkPoints; + float PCAScore; + int tmpSize = 10; + int tmpStep = 10; + + float *rootScoreForLevel; + int maxX, maxY, maxPathX, maxPathY, step; + int pathX, pathY; + int ai; + + + float **cashM; + int **maskM; + int sizeM; + + sizeM = H_PCA->pyramid[0]->sizeX - maxXBorder + 1; + sizeM *= H_PCA->pyramid[0]->sizeY - maxYBorder + 1; + + cashM = (float**)malloc(sizeof(float *) * n); + maskM = (int **)malloc(sizeof(int *) * n); + for(ai = 0; ai < n; ai++){ + cashM[ai] = (float*)malloc(sizeof(float) * sizeM); + maskM[ai] = (int *)malloc(sizeof(int) * (sizeM/(sizeof(int) * 8) + 1)); + } + + + PCAPoints = (CvPoint*)malloc(sizeof(CvPoint) * tmpSize); + PCALevels = (int*)malloc(sizeof(int) * tmpSize); + PCAAScore = (float **)malloc(sizeof(float *) * tmpSize); + for(ai = 0; ai < tmpSize; ai++){ + PCAAScore[ai] = (float *)malloc(sizeof(float) * (n + 2)); + } + + PCAkPoints = 0; + for(k = LAMBDA; k < H_PCA->numLevels; k++){ + maxX = H_PCA->pyramid[k]->sizeX - maxXBorder + 1; + maxY = H_PCA->pyramid[k]->sizeY - maxYBorder + 1; + maxPathX = H_PCA->pyramid[k - LAMBDA]->sizeX - maxXBorder + 1; + maxPathY = H_PCA->pyramid[k - LAMBDA]->sizeY - maxYBorder + 1; + rootScoreForLevel = (float *) malloc(sizeof(float) + * (maxX - (int)ceil(maxXBorder/2.0)) + * (maxY - (int)ceil(maxYBorder/2.0))); + + step = maxX - (int)ceil(maxXBorder/2.0); + //dump = fopen("map_10.csv", "w"); + for(j = (int)ceil(maxYBorder/2.0) ; j < maxY; j++){ + for(i = (int)ceil(maxXBorder/2.0) ; i < maxX; i++){ + rootScoreForLevel[(j - (int)ceil(maxYBorder/2.0)) * step + i - (int)ceil(maxXBorder/2.0)] + = calcM_PCA(k, i, j, H_PCA, all_F[0]); + // fprintf(dump, "%f;", rootScoreForLevel[j * maxX + i]); + } + // fprintf(dump, "\n"); + } + // fclose(dump); + + sizeM = maxPathX * maxPathY; + for(path = 0 ; path < n; path++){ + memset(maskM[path], 0, sizeof(int) * (sizeM/(sizeof(int) * 8) + 1)); + } + + for(j = (int)ceil(maxYBorder/2.0) ; j < maxY; j++){ + for(i = (int)ceil(maxXBorder/2.0) ; i < maxX; i++){ + // PCAScore = calcM_PCA(k, i, j, H_PCA, all_F[0]); + PCAScore = + rootScoreForLevel[(j - (int)ceil(maxYBorder/2.0)) * step + i - (int)ceil(maxXBorder/2.0)]; + PCAScore += b; + PCAAScore[PCAkPoints][0] = PCAScore - b; + + flag2=0; + for(path = 1 ; (path <= n) && (!flag2); path++){ + if(PCAScore > all_F[path - 1]->Deformation_PCA) + { + p = F_MIN ; + //pathX = (i - maxXBorder - 1) * 2 + maxXBorder + 1 + all_F[path]->V.x; + //pathY = (j - maxYBorder - 1) * 2 + maxYBorder + 1 + all_F[path]->V.y; + pathX = i * 2 - maxXBorder + all_F[path]->V.x; + pathY = j * 2 - maxYBorder + all_F[path]->V.y; + flag = 1; + for(dj = max(0, pathY - all_F[path]->deltaY); + dj < min(maxPathY, pathY + all_F[path]->deltaY); + dj++){ + for(di = max(0, pathX - all_F[path]->deltaX); + di < min(maxPathX, pathX + all_F[path]->deltaX); + di++){ + //fine = calcFine(all_F[path], abs(pathX - di), abs(pathY - dj)); + fine = calcFine(all_F[path], pathX - di, pathY - dj); + if((PCAScore - fine) > all_F[path - 1]->Hypothesis_PCA) + { + flag = 0; + mpath = calcM_PCA_cash(k - LAMBDA, di, dj, H_PCA, all_F[path], cashM[path - 1], maskM[path - 1], maxPathX) - fine; + if( mpath > p){ + p = mpath; + } + } + } + } + if(flag==0){ + PCAAScore[PCAkPoints][path] = p;// + pfine; + PCAScore += p;// + pfine; + } else flag2 = 1; + } + else flag2 = 1; + } + if((PCAScore > all_F[n]->Hypothesis_PCA)&&(flag2==0)){ + PCALevels[PCAkPoints] = k; + PCAPoints[PCAkPoints].x = i; + PCAPoints[PCAkPoints].y = j; + PCAAScore[PCAkPoints][n + 1] = PCAScore; + PCAkPoints ++; + if(PCAkPoints >= tmpSize){ + tmpPoints = (CvPoint*)malloc(sizeof(CvPoint) * (tmpSize + tmpStep)); + tmpLevels = (int*)malloc(sizeof(int) * (tmpSize + tmpStep)); + tmpAScore = (float **)malloc(sizeof(float *) * (tmpSize + tmpStep)); + for(ai = tmpSize; ai < tmpSize + tmpStep; ai++){ + tmpAScore[ai] = (float *)malloc(sizeof(float) * (n + 2)); + } + for(ii = 0; ii < PCAkPoints; ii++){ + tmpLevels[ii] = PCALevels[ii] ; + tmpPoints[ii].x = PCAPoints[ii].x; + tmpPoints[ii].y = PCAPoints[ii].y; + tmpAScore[ii] = PCAAScore[ii] ; + } + free(PCALevels); + free(PCAPoints); + free(PCAAScore); + PCALevels = tmpLevels; + PCAPoints = tmpPoints; + PCAAScore = tmpAScore; + tmpSize += tmpStep; + } + } + } + } + free (rootScoreForLevel); + } + + (*points) = (CvPoint *)malloc(sizeof(CvPoint) * PCAkPoints); + (*levels) = (int *)malloc(sizeof(int ) * PCAkPoints); + (*score ) = (float *)malloc(sizeof(float ) * PCAkPoints); + (*partsDisplacement) = (CvPoint **)malloc(sizeof(CvPoint *) * (PCAkPoints + 1)); + + (*kPoints) = 0; + if(PCAkPoints > 0) + (*partsDisplacement)[(*kPoints)] = (CvPoint *)malloc(sizeof(CvPoint) * (n + 1)); + for(ii = 0; ii < PCAkPoints; ii++) + { + k = PCALevels[ii] ; + i = PCAPoints[ii].x; + j = PCAPoints[ii].y; + + maxPathX = H_PCA->pyramid[k - LAMBDA]->sizeX - maxXBorder + 1; + maxPathY = H_PCA->pyramid[k - LAMBDA]->sizeY - maxYBorder + 1; + + (*score )[(*kPoints)] = PCAAScore[ii][n + 1] + calcM(k, i, j, H, all_F[0]) - PCAAScore[ii][0]; + (*partsDisplacement)[(*kPoints)][0].x = i; + (*partsDisplacement)[(*kPoints)][0].y = j; + for(path = 1 ; path <= n; path++){ + if((*score )[(*kPoints)] < all_F[path - 1]->Deformation) break; + // { + p = F_MIN ; + flag = 1; + //pathX = (i - maxXBorder - 1) * 2 + maxXBorder + 1 + all_F[path]->V.x; + //pathY = (j - maxYBorder - 1) * 2 + maxYBorder + 1 + all_F[path]->V.y; + pathX = i * 2 - maxXBorder + all_F[path]->V.x; + pathY = j * 2 - maxYBorder + all_F[path]->V.y; + for(dj = max(0, pathY - all_F[path]->deltaY); + dj < min(maxPathY, pathY + all_F[path]->deltaY); + dj++){ + for(di = max(0, pathX - all_F[path]->deltaX); + di < min(maxPathX, pathX + all_F[path]->deltaX); + di++){ + //fine = calcFine(all_F[path], abs(pathX - di), abs(pathY - dj)); + fine = calcFine(all_F[path], pathX - di, pathY - dj); + if(((*score )[(*kPoints)] - fine) > all_F[path - 1]->Hypothesis) + { + flag = 0; + mpath = calcM(k - LAMBDA, di, dj, H, all_F[path]) - fine; + if(mpath > p){ + p = mpath; + (*partsDisplacement)[(*kPoints)][path].x = di; + (*partsDisplacement)[(*kPoints)][path].y = dj; + } + } + } + } + if(flag == 0) + (*score )[(*kPoints)] += p - PCAAScore[ii][path];// + pfine; + // } + } + if((*score )[(*kPoints)] > scoreThreshold) + { + (*levels)[(*kPoints)] = k; + (*points)[(*kPoints)].x = i; + (*points)[(*kPoints)].y = j; + (*kPoints) ++; + (*partsDisplacement)[(*kPoints)] = (CvPoint*) malloc(sizeof(CvPoint) * (n + 1)); + } + } + if((*kPoints) > 0){ + free((*partsDisplacement)[(*kPoints)]); + } + // Matching end + + free(PCAPoints); + free(PCALevels); + for(ai = 0; ai < tmpSize; ai++){ + free(PCAAScore[ai]); + } + free(PCAAScore); + + for(ai = 0; ai < n; ai++){ + free(cashM[ai]); + free(maskM[ai]); + } + free(cashM); + free(maskM); + + if (opResult != (LATENT_SVM_OK)) + { + return LATENT_SVM_SEARCH_OBJECT_FAILED; + } + + // Transformation filter displacement from the block space + // to the space of pixels at the initial image + // that settles at the level number LAMBDA + convertPoints(H->numLevels, LAMBDA, LAMBDA, (*points), + (*levels), (*partsDisplacement), (*kPoints), n, + maxXBorder, maxYBorder); + + return LATENT_SVM_OK; +} + +/* +// Compute opposite point for filter box +// +// API +// int getOppositePoint(CvPoint point, + int sizeX, int sizeY, + float step, int degree, + CvPoint *oppositePoint); + +// INPUT +// point - coordinates of filter top left corner + (in the space of pixels) +// (sizeX, sizeY) - filter dimension in the block space +// step - scaling factor +// degree - degree of the scaling factor +// OUTPUT +// oppositePoint - coordinates of filter bottom corner + (in the space of pixels) +// RESULT +// Error status +*/ +int getOppositePoint(CvPoint point, + int sizeX, int sizeY, + float step, int degree, + CvPoint *oppositePoint) +{ + float scale; + scale = SIDE_LENGTH * powf(step, (float)degree); + oppositePoint->x = (int)(point.x + sizeX * scale); + oppositePoint->y = (int)(point.y + sizeY * scale); + return LATENT_SVM_OK; +} + + +/* +// Drawing root filter boxes +// +// API +// int showRootFilterBoxes(const IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); +// INPUT +// image - initial image +// filter - root filter object +// points - a set of points +// levels - levels of feature pyramid +// kPoints - number of points +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// window contained initial image and filter boxes +// RESULT +// Error status +*/ +int showRootFilterBoxes(IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift) +{ + int i; + float step; + CvPoint oppositePoint; + step = powf( 2.0f, 1.0f / ((float)LAMBDA)); + + for (i = 0; i < kPoints; i++) + { + // Drawing rectangle for filter + getOppositePoint(points[i], filter->sizeX, filter->sizeY, + step, levels[i] - LAMBDA, &oppositePoint); + cvRectangle(image, points[i], oppositePoint, + color, thickness, line_type, shift); + } +#ifdef HAVE_OPENCV_HIGHGUI + cvShowImage("Initial image", image); +#endif + return LATENT_SVM_OK; +} + +/* +// Drawing part filter boxes +// +// API +// int showPartFilterBoxes(const IplImage *image, + const CvLSVMFilterObjectCaskade *filter, + CvPoint *points, int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift); +// INPUT +// image - initial image +// filters - a set of part filters +// n - number of part filters +// partsDisplacement - a set of points +// levels - levels of feature pyramid +// kPoints - number of foot filter positions +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// window contained initial image and filter boxes +// RESULT +// Error status +*/ +int showPartFilterBoxes(IplImage *image, + const CvLSVMFilterObjectCaskade **filters, + int n, CvPoint **partsDisplacement, + int *levels, int kPoints, + CvScalar color, int thickness, + int line_type, int shift) +{ + int i, j; + float step; + CvPoint oppositePoint; + + step = powf( 2.0f, 1.0f / ((float)LAMBDA)); + + for (i = 0; i < kPoints; i++) + { + for (j = 0; j < n; j++) + { + // Drawing rectangles for part filters + getOppositePoint(partsDisplacement[i][j], + filters[j + 1]->sizeX, filters[j + 1]->sizeY, + step, levels[i] - 2 * LAMBDA, &oppositePoint); + cvRectangle(image, partsDisplacement[i][j], oppositePoint, + color, thickness, line_type, shift); + } + } +#ifdef HAVE_OPENCV_HIGHGUI + cvShowImage("Initial image", image); +#endif + return LATENT_SVM_OK; +} + +/* +// Drawing boxes +// +// API +// int showBoxes(const IplImage *img, + const CvPoint *points, const CvPoint *oppositePoints, int kPoints, + CvScalar color, int thickness, int line_type, int shift); +// INPUT +// img - initial image +// points - top left corner coordinates +// oppositePoints - right bottom corner coordinates +// kPoints - points number +// color - line color for each box +// thickness - line thickness +// line_type - line type +// shift - shift +// OUTPUT +// RESULT +// Error status +*/ +int showBoxes(IplImage *img, + const CvPoint *points, const CvPoint *oppositePoints, int kPoints, + CvScalar color, int thickness, int line_type, int shift) +{ + int i; + for (i = 0; i < kPoints; i++) + { + cvRectangle(img, points[i], oppositePoints[i], + color, thickness, line_type, shift); + } +#ifdef HAVE_OPENCV_HIGHGUI + cvShowImage("Initial image", img); +#endif + return LATENT_SVM_OK; +} + +///* +//// Computation maximum filter size for each dimension +//// +//// API +//// int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, +// const int *kPartFilters, +// unsigned int *maxXBorder, unsigned int *maxYBorder); +//// INPUT +//// filters - a set of filters (at first root filter, then part filters +// and etc. for all components) +//// kComponents - number of components +//// kPartFilters - number of part filters for each component +//// OUTPUT +//// maxXBorder - maximum of filter size at the horizontal dimension +//// maxYBorder - maximum of filter size at the vertical dimension +//// RESULT +//// Error status +//*/ +//int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, +// const int *kPartFilters, +// unsigned int *maxXBorder, unsigned int *maxYBorder) +//{ +// int i, componentIndex; +// *maxXBorder = filters[0]->sizeX; +// *maxYBorder = filters[0]->sizeY; +// componentIndex = kPartFilters[0] + 1; +// for (i = 1; i < kComponents; i++) +// { +// if ((unsigned)filters[componentIndex]->sizeX > *maxXBorder) +// { +// *maxXBorder = filters[componentIndex]->sizeX; +// } +// if ((unsigned)filters[componentIndex]->sizeY > *maxYBorder) +// { +// *maxYBorder = filters[componentIndex]->sizeY; +// } +// componentIndex += (kPartFilters[i] + 1); +// } +// return LATENT_SVM_OK; +//} + + +#ifdef HAVE_TBB + +class PathOfModel :public ParallelLoopBody{ + int *componentIndex; + const CvLSVMFeaturePyramidCaskade *H; + const CvLSVMFeaturePyramidCaskade *H_PCA; + const CvLSVMFilterObjectCaskade **filters; + const int *kPartFilters; + const float *b; + unsigned int maxXBorder, maxYBorder; + CvPoint **pointsArr, **oppPointsArr, ***partsDisplacementArr; + float **scoreArr; + int *kPointsArr, **levelsArr; + float scoreThreshold; + CvPoint **oppPoints; +public: + PathOfModel( + int *_componentIndex, + const CvLSVMFeaturePyramidCaskade *_H, + const CvLSVMFeaturePyramidCaskade *_H_PCA, + const CvLSVMFilterObjectCaskade **_filters, + const int *_kPartFilters, + const float *_b, + unsigned int _maxXBorder, unsigned int _maxYBorder, + CvPoint **_pointsArr, CvPoint **_oppPointsArr, CvPoint ***_partsDisplacementArr, + float **_scoreArr, + int *_kPointsArr, int **_levelsArr, + float _scoreThreshold, + CvPoint **_oppPoints + ): + componentIndex(_componentIndex), + H(_H), + H_PCA(_H_PCA), + filters(_filters), + kPartFilters(_kPartFilters), + b(_b), + maxXBorder(_maxXBorder), + maxYBorder(_maxYBorder), + pointsArr(_pointsArr), + oppPointsArr(_oppPointsArr), + partsDisplacementArr(_partsDisplacementArr), + scoreArr(_scoreArr), + kPointsArr(_kPointsArr), + levelsArr(_levelsArr), + scoreThreshold(_scoreThreshold), + oppPoints(_oppPoints) + {} + + + void operator() (const Range& range) const + { + + for( int i=range.start; i!=range.end; ++i ) + { + searchObjectThreshold(H, H_PCA, &(filters[componentIndex[i]]), kPartFilters[i], + b[i], maxXBorder, maxYBorder, scoreThreshold, + &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), + &(scoreArr[i]), &(partsDisplacementArr[i])); + estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], + filters[componentIndex[i]]->sizeX, filters[componentIndex[i]]->sizeY, &(oppPointsArr[i])); + } + } +}; + +#endif +/* +// Computation root filters displacement and values of score function +// +// API +// int searchObjectThresholdSomeComponents(const featurePyramid *H, + const CvLSVMFilterObjectCaskade **filters, + int kComponents, const int *kPartFilters, + const float *b, float scoreThreshold, + CvPoint **points, CvPoint **oppPoints, + float **score, int *kPoints); +// INPUT +// H - feature pyramid +// filters - filters (root filter then it's part filters, etc.) +// kComponents - root filters number +// kPartFilters - array of part filters number for each component +// b - array of linear terms +// scoreThreshold - score threshold +// OUTPUT +// points - root filters displacement (top left corners) +// oppPoints - root filters displacement (bottom right corners) +// score - array of score values +// kPoints - number of boxes +// RESULT +// Error status +*/ +int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramidCaskade *H, + const CvLSVMFeaturePyramidCaskade *H_PCA, + const CvLSVMFilterObjectCaskade **filters, + int kComponents, const int *kPartFilters, + const float *b, float scoreThreshold, + CvPoint **points, CvPoint **oppPoints, + float **score, int *kPoints) +{ + int i, j, s, f, *componentIndex; + unsigned int maxXBorder, maxYBorder; + CvPoint **pointsArr, **oppPointsArr, ***partsDisplacementArr; + float **scoreArr; + int *kPointsArr, **levelsArr; + int sum; + + // Allocation memory + pointsArr = (CvPoint **)malloc(sizeof(CvPoint *) * kComponents); + oppPointsArr = (CvPoint **)malloc(sizeof(CvPoint *) * kComponents); + scoreArr = (float **)malloc(sizeof(float *) * kComponents); + kPointsArr = (int *)malloc(sizeof(int) * kComponents); + levelsArr = (int **)malloc(sizeof(int *) * kComponents); + partsDisplacementArr = (CvPoint ***)malloc(sizeof(CvPoint **) * kComponents); + componentIndex = (int *)malloc(sizeof(int) * kComponents); + + // Getting maximum filter dimensions + getMaxFilterDims(filters, kComponents, kPartFilters, &maxXBorder, &maxYBorder); + *kPoints = 0; + sum = 0; + componentIndex[0] = 0; + for (i = 1; i < kComponents; i++) + { + componentIndex[i] = componentIndex[i - 1] + (kPartFilters[i - 1] + 1); + } + // For each component perform searching +//#pragma omp parallel for schedule(dynamic) reduction(+ : sum) +#ifdef HAVE_TBB + PathOfModel POM( + componentIndex, + H, + H_PCA, + filters, + kPartFilters, + b, + maxXBorder, + maxYBorder, + pointsArr, + oppPointsArr, + partsDisplacementArr, + scoreArr, + kPointsArr, + levelsArr, + scoreThreshold, + oppPoints); + cv::parallel_for_( Range( 0, kComponents ), POM); +#else + for (i = 0; i < kComponents; i++) + { + searchObjectThreshold(H, H_PCA, &(filters[componentIndex[i]]), kPartFilters[i], + b[i], maxXBorder, maxYBorder, scoreThreshold, + &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), + &(scoreArr[i]), &(partsDisplacementArr[i])); + estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], + filters[componentIndex[i]]->sizeX, filters[componentIndex[i]]->sizeY, &(oppPointsArr[i])); + } +#endif + for (i = 0; i < kComponents; i++) + { + //*kPoints += kPointsArr[i]; + sum += kPointsArr[i]; + } + *kPoints = sum; + *points = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints)); + *oppPoints = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints)); + *score = (float *)malloc(sizeof(float) * (*kPoints)); + + //file = fopen("point.txt", "w"); + s = 0; + for (i = 0; i < kComponents; i++) + { + f = s + kPointsArr[i]; + for (j = s; j < f; j++) + { + (*points)[j].x = pointsArr[i][j - s].x; + (*points)[j].y = pointsArr[i][j - s].y; + (*oppPoints)[j].x = oppPointsArr[i][j - s].x; + (*oppPoints)[j].y = oppPointsArr[i][j - s].y; + (*score)[j] = scoreArr[i][j - s]; + // fprintf(file, "%d %d %d %d %f\n", (*points)[j].x, (*points)[j].y, + // (*oppPoints)[j].x, (*oppPoints)[j].y, (*score)[j]); + } + s = f; + } + //fclose(file); + + // Release allocated memory + for (i = 0; i < kComponents; i++) + { + free(pointsArr[i]); + free(oppPointsArr[i]); + free(scoreArr[i]); + free(levelsArr[i]); + for (j = 0; j < kPointsArr[i]; j++) + { + free(partsDisplacementArr[i][j]); + } + free(partsDisplacementArr[i]); + } + free(pointsArr); + free(oppPointsArr); + free(scoreArr); + free(kPointsArr); + free(levelsArr); + free(partsDisplacementArr); + free(componentIndex); + return LATENT_SVM_OK; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_latentsvmdetector.cpp b/modules/latentsvm/src/lsvmc_latentsvmdetector.cpp new file mode 100644 index 00000000000..e24495fb830 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_latentsvmdetector.cpp @@ -0,0 +1,331 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_parser.h" +#include "_lsvmc_matching.h" +namespace cv +{ +namespace lsvm +{ + +std::string extractModelName( const std::string& filename ); + +const int pca_size = 31; + +CvLatentSvmDetectorCaskade* cvLoadLatentSvmDetectorCaskade(const char* filename); +void cvReleaseLatentSvmDetectorCaskade(CvLatentSvmDetectorCaskade** detector); +CvSeq* cvLatentSvmDetectObjectsCaskade(IplImage* image, + CvLatentSvmDetectorCaskade* detector, + CvMemStorage* storage, + float overlap_threshold); + +/* +// load trained detector from a file +// +// API +// CvLatentSvmDetectorCaskade* cvLoadLatentSvmDetector(const char* filename); +// INPUT +// filename - path to the file containing the parameters of +// - trained Latent SVM detector +// OUTPUT +// trained Latent SVM detector in internal representation +*/ +CvLatentSvmDetectorCaskade* cvLoadLatentSvmDetectorCaskade(const char* filename) +{ + CvLatentSvmDetectorCaskade* detector = 0; + CvLSVMFilterObjectCaskade** filters = 0; + int kFilters = 0; + int kComponents = 0; + int* kPartFilters = 0; + float* b = 0; + float scoreThreshold = 0.f; + int err_code = 0; + float* PCAcoeff = 0; + + err_code = loadModel(filename, &filters, &kFilters, &kComponents, &kPartFilters, &b, &scoreThreshold, &PCAcoeff); + if (err_code != LATENT_SVM_OK) return 0; + + detector = (CvLatentSvmDetectorCaskade*)malloc(sizeof(CvLatentSvmDetectorCaskade)); + detector->filters = filters; + detector->b = b; + detector->num_components = kComponents; + detector->num_filters = kFilters; + detector->num_part_filters = kPartFilters; + detector->score_threshold = scoreThreshold; + detector->pca = PCAcoeff; + detector->pca_size = pca_size; + + return detector; +} + +/* +// release memory allocated for CvLatentSvmDetectorCaskade structure +// +// API +// void cvReleaseLatentSvmDetector(CvLatentSvmDetectorCaskade** detector); +// INPUT +// detector - CvLatentSvmDetectorCaskade structure to be released +// OUTPUT +*/ +void cvReleaseLatentSvmDetectorCaskade(CvLatentSvmDetectorCaskade** detector) +{ + free((*detector)->b); + free((*detector)->num_part_filters); + for (int i = 0; i < (*detector)->num_filters; i++) + { + free((*detector)->filters[i]->H); + free((*detector)->filters[i]); + } + free((*detector)->filters); + free((*detector)->pca); + free((*detector)); + *detector = 0; +} + +/* +// find rectangular regions in the given image that are likely +// to contain objects and corresponding confidence levels +// +// API +// CvSeq* cvLatentSvmDetectObjects(const IplImage* image, +// CvLatentSvmDetectorCaskade* detector, +// CvMemStorage* storage, +// float overlap_threshold = 0.5f); +// INPUT +// image - image to detect objects in +// detector - Latent SVM detector in internal representation +// storage - memory storage to store the resultant sequence +// of the object candidate rectangles +// overlap_threshold - threshold for the non-maximum suppression algorithm [here will be the reference to original paper] +// OUTPUT +// sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures) +*/ +CvSeq* cvLatentSvmDetectObjectsCaskade(IplImage* image, + CvLatentSvmDetectorCaskade* detector, + CvMemStorage* storage, + float overlap_threshold) +{ + CvLSVMFeaturePyramidCaskade *H = 0; + CvLSVMFeaturePyramidCaskade *H_PCA = 0; + CvPoint *points = 0, *oppPoints = 0; + int kPoints = 0; + float *score = 0; + unsigned int maxXBorder = 0, maxYBorder = 0; + int numBoxesOut = 0; + CvPoint *pointsOut = 0; + CvPoint *oppPointsOut = 0; + float *scoreOut = 0; + CvSeq* result_seq = 0; + int error = 0; + + if(image->nChannels == 3) + cvCvtColor(image, image, CV_BGR2RGB); + + // Getting maximum filter dimensions + getMaxFilterDims((const CvLSVMFilterObjectCaskade**)(detector->filters), detector->num_components, + detector->num_part_filters, &maxXBorder, &maxYBorder); + // Create feature pyramid with nullable border + H = createFeaturePyramidWithBorder(image, maxXBorder, maxYBorder); + + // Create PSA feature pyramid + H_PCA = createPCA_FeaturePyramid(H, detector, maxXBorder, maxYBorder); + + FeaturePyramid32(H, maxXBorder, maxYBorder); + + // Search object + error = searchObjectThresholdSomeComponents(H, H_PCA,(const CvLSVMFilterObjectCaskade**)(detector->filters), + detector->num_components, detector->num_part_filters, detector->b, detector->score_threshold, + &points, &oppPoints, &score, &kPoints); + if (error != LATENT_SVM_OK) + { + return NULL; + } + // Clipping boxes + clippingBoxes(image->width, image->height, points, kPoints); + clippingBoxes(image->width, image->height, oppPoints, kPoints); + // NMS procedure + nonMaximumSuppression(kPoints, points, oppPoints, score, overlap_threshold, + &numBoxesOut, &pointsOut, &oppPointsOut, &scoreOut); + + result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvObjectDetection), storage ); + + for (int i = 0; i < numBoxesOut; i++) + { + CvObjectDetection detection; + detection.score = scoreOut[i]; + detection.rect.x = pointsOut[i].x; + detection.rect.y = pointsOut[i].y; + detection.rect.width = oppPointsOut[i].x - pointsOut[i].x; + detection.rect.height = oppPointsOut[i].y - pointsOut[i].y; + cvSeqPush(result_seq, &detection); + } + + if(image->nChannels == 3) + cvCvtColor(image, image, CV_RGB2BGR); + + freeFeaturePyramidObject(&H); + freeFeaturePyramidObject(&H_PCA); + free(points); + free(oppPoints); + free(score); + + return result_seq; +} + +class LSVMDetectorImpl : public LSVMDetector +{ +public: + + LSVMDetectorImpl( const std::vector& filenames, const std::vector& classNames=std::vector() ); + ~LSVMDetectorImpl(); + + bool isEmpty() const; + + void detect(cv::Mat const &image, CV_OUT std::vector& objects, float overlapThreshold=0.5f); + + const std::vector& getClassNames() const; + size_t getClassCount() const; + +private: + std::vector detectors; + std::vector classNames; +}; + +cv::Ptr LSVMDetector::create(std::vector const &filenames, + std::vector const &classNames) +{ + return cv::makePtr(filenames, classNames); +} + +LSVMDetectorImpl::ObjectDetection::ObjectDetection() : score(0.f), classID(-1) {} + +LSVMDetectorImpl::ObjectDetection::ObjectDetection( const Rect& _rect, float _score, int _classID ) : + rect(_rect), score(_score), classID(_classID) {} + + +LSVMDetectorImpl::LSVMDetectorImpl( const std::vector& filenames, const std::vector& _classNames ) +{ + for( size_t i = 0; i < filenames.size(); i++ ) + { + const std::string filename = filenames[i]; + if( filename.length() < 5 || filename.substr(filename.length()-4, 4) != ".xml" ) + continue; + + CvLatentSvmDetectorCaskade* detector = cvLoadLatentSvmDetectorCaskade( filename.c_str() ); + if( detector ) + { + detectors.push_back( detector ); + if( _classNames.empty() ) + { + classNames.push_back( extractModelName(filenames[i]) ); + } + else + classNames.push_back( _classNames[i] ); + } + } +} + +LSVMDetectorImpl::~LSVMDetectorImpl() +{ + for(size_t i = 0; i < detectors.size(); i++) + cv::lsvm::cvReleaseLatentSvmDetectorCaskade(&detectors[i]); +} + +bool LSVMDetectorImpl::isEmpty() const +{ + return detectors.empty(); +} + +const std::vector& LSVMDetectorImpl::getClassNames() const +{ + return classNames; +} + +size_t LSVMDetectorImpl::getClassCount() const +{ + return classNames.size(); +} + +std::string extractModelName( const std::string& filename ) +{ + size_t startPos = filename.rfind('/'); + if( startPos == std::string::npos ) + startPos = filename.rfind('\\'); + + if( startPos == std::string::npos ) + startPos = 0; + else + startPos++; + + const int extentionSize = 4; //.xml + + int substrLength = (int)(filename.size() - startPos - extentionSize); + + return filename.substr(startPos, substrLength); +} + +void LSVMDetectorImpl::detect( cv::Mat const &image, + std::vector &objectDetections, + float overlapThreshold) +{ + objectDetections.clear(); + + for( size_t classID = 0; classID < detectors.size(); classID++ ) + { + IplImage image_ipl = image; + CvMemStorage* storage = cvCreateMemStorage(0); + CvSeq* detections = cv::lsvm::cvLatentSvmDetectObjectsCaskade( &image_ipl, (CvLatentSvmDetectorCaskade*)(detectors[classID]), storage, overlapThreshold); + + // convert results + objectDetections.reserve( objectDetections.size() + detections->total ); + for( int detectionIdx = 0; detectionIdx < detections->total; detectionIdx++ ) + { + CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, detectionIdx ); + objectDetections.push_back( ObjectDetection(Rect(detection.rect), detection.score, (int)classID) ); + } + + cvReleaseMemStorage( &storage ); + } +} + +} // namespace cv +} diff --git a/modules/latentsvm/src/lsvmc_matching.cpp b/modules/latentsvm/src/lsvmc_matching.cpp new file mode 100644 index 00000000000..3795d7e4a41 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_matching.cpp @@ -0,0 +1,285 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_matching.h" +#include + +#ifndef max +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif +namespace cv +{ +namespace lsvm +{ + +void sort(int n, const float* x, int* indices); + +/* +// Computation border size for feature map +// +// API +// int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by); +// INPUT +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// OUTPUT +// bx - border size (X-direction) +// by - border size (Y-direction) +// RESULT +// Error status +*/ +int computeBorderSize(int maxXBorder, int maxYBorder, int *bx, int *by) +{ + *bx = (int)ceilf(((float) maxXBorder) / 2.0f + 1.0f); + *by = (int)ceilf(((float) maxYBorder) / 2.0f + 1.0f); + return LATENT_SVM_OK; +} + +/* +// Addition nullable border to the feature map +// +// API +// int addNullableBorder(featureMap *map, int bx, int by); +// INPUT +// map - feature map +// bx - border size (X-direction) +// by - border size (Y-direction) +// OUTPUT +// RESULT +// Error status +*/ +int addNullableBorder(CvLSVMFeatureMapCaskade *map, int bx, int by) +{ + int sizeX, sizeY, i, j, k; + float *new_map; + sizeX = map->sizeX + 2 * bx; + sizeY = map->sizeY + 2 * by; + new_map = (float *)malloc(sizeof(float) * sizeX * sizeY * map->numFeatures); + for (i = 0; i < sizeX * sizeY * map->numFeatures; i++) + { + new_map[i] = 0.0; + } + for (i = by; i < map->sizeY + by; i++) + { + for (j = bx; j < map->sizeX + bx; j++) + { + for (k = 0; k < map->numFeatures; k++) + { + new_map[(i * sizeX + j) * map->numFeatures + k] = + map->map[((i - by) * map->sizeX + j - bx) * map->numFeatures + k]; + } + } + } + map->sizeX = sizeX; + map->sizeY = sizeY; + free(map->map); + map->map = new_map; + return LATENT_SVM_OK; +} + +/* +// Computation maximum filter size for each dimension +// +// API +// int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, + const int *kPartFilters, + unsigned int *maxXBorder, unsigned int *maxYBorder); +// INPUT +// filters - a set of filters (at first root filter, then part filters + and etc. for all components) +// kComponents - number of components +// kPartFilters - number of part filters for each component +// OUTPUT +// maxXBorder - maximum of filter size at the horizontal dimension +// maxYBorder - maximum of filter size at the vertical dimension +// RESULT +// Error status +*/ +int getMaxFilterDims(const CvLSVMFilterObjectCaskade **filters, int kComponents, + const int *kPartFilters, + unsigned int *maxXBorder, unsigned int *maxYBorder) +{ + int i, componentIndex; + *maxXBorder = filters[0]->sizeX; + *maxYBorder = filters[0]->sizeY; + componentIndex = kPartFilters[0] + 1; + for (i = 1; i < kComponents; i++) + { + if (unsigned(filters[componentIndex]->sizeX) > *maxXBorder) + { + *maxXBorder = filters[componentIndex]->sizeX; + } + if (unsigned(filters[componentIndex]->sizeY) > *maxYBorder) + { + *maxYBorder = filters[componentIndex]->sizeY; + } + componentIndex += (kPartFilters[i] + 1); + } + return LATENT_SVM_OK; +} + +void sort(int n, const float* x, int* indices) +{ + int i, j; + for (i = 0; i < n; i++) + for (j = i + 1; j < n; j++) + { + if (x[indices[j]] > x[indices[i]]) + { + //float x_tmp = x[i]; + int index_tmp = indices[i]; + //x[i] = x[j]; + indices[i] = indices[j]; + //x[j] = x_tmp; + indices[j] = index_tmp; + } + } +} + +/* +// Perform non-maximum suppression algorithm (described in original paper) +// to remove "similar" bounding boxes +// +// API +// int nonMaximumSuppression(int numBoxes, const CvPoint *points, + const CvPoint *oppositePoints, const float *score, + float overlapThreshold, + int *numBoxesOut, CvPoint **pointsOut, + CvPoint **oppositePointsOut, float **scoreOut); +// INPUT +// numBoxes - number of bounding boxes +// points - array of left top corner coordinates +// oppositePoints - array of right bottom corner coordinates +// score - array of detection scores +// overlapThreshold - threshold: bounding box is removed if overlap part + is greater than passed value +// OUTPUT +// numBoxesOut - the number of bounding boxes algorithm returns +// pointsOut - array of left top corner coordinates +// oppositePointsOut - array of right bottom corner coordinates +// scoreOut - array of detection scores +// RESULT +// Error status +*/ +int nonMaximumSuppression(int numBoxes, const CvPoint *points, + const CvPoint *oppositePoints, const float *score, + float overlapThreshold, + int *numBoxesOut, CvPoint **pointsOut, + CvPoint **oppositePointsOut, float **scoreOut) +{ + int i, j, index; + float* box_area = (float*)malloc(numBoxes * sizeof(float)); + int* indices = (int*)malloc(numBoxes * sizeof(int)); + int* is_suppressed = (int*)malloc(numBoxes * sizeof(int)); + + for (i = 0; i < numBoxes; i++) + { + indices[i] = i; + is_suppressed[i] = 0; + box_area[i] = (float)( (oppositePoints[i].x - points[i].x + 1) * + (oppositePoints[i].y - points[i].y + 1)); + } + + sort(numBoxes, score, indices); + for (i = 0; i < numBoxes; i++) + { + if (!is_suppressed[indices[i]]) + { + for (j = i + 1; j < numBoxes; j++) + { + if (!is_suppressed[indices[j]]) + { + int x1max = max(points[indices[i]].x, points[indices[j]].x); + int x2min = min(oppositePoints[indices[i]].x, oppositePoints[indices[j]].x); + int y1max = max(points[indices[i]].y, points[indices[j]].y); + int y2min = min(oppositePoints[indices[i]].y, oppositePoints[indices[j]].y); + int overlapWidth = x2min - x1max + 1; + int overlapHeight = y2min - y1max + 1; + if (overlapWidth > 0 && overlapHeight > 0) + { + float overlapPart = (overlapWidth * overlapHeight) / box_area[indices[j]]; + if (overlapPart > overlapThreshold) + { + is_suppressed[indices[j]] = 1; + } + } + } + } + } + } + + *numBoxesOut = 0; + for (i = 0; i < numBoxes; i++) + { + if (!is_suppressed[i]) (*numBoxesOut)++; + } + + *pointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint)); + *oppositePointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint)); + *scoreOut = (float *)malloc((*numBoxesOut) * sizeof(float)); + index = 0; + for (i = 0; i < numBoxes; i++) + { + if (!is_suppressed[indices[i]]) + { + (*pointsOut)[index].x = points[indices[i]].x; + (*pointsOut)[index].y = points[indices[i]].y; + (*oppositePointsOut)[index].x = oppositePoints[indices[i]].x; + (*oppositePointsOut)[index].y = oppositePoints[indices[i]].y; + (*scoreOut)[index] = score[indices[i]]; + index++; + } + + } + + free(indices); + free(box_area); + free(is_suppressed); + + return LATENT_SVM_OK; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_parser.cpp b/modules/latentsvm/src/lsvmc_parser.cpp new file mode 100644 index 00000000000..b606b4f3112 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_parser.cpp @@ -0,0 +1,1113 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include "string.h" +#include "_lsvmc_parser.h" +#include "_lsvmc_error.h" + +namespace cv +{ +namespace lsvm +{ + +int isMODEL (char *str); +int isP (char *str); +int isSCORE (char *str); +int isCOMP (char *str); +int isRFILTER (char *str); +int isPFILTERs (char *str); +int isPFILTER (char *str); +int isSIZEX (char *str); +int isSIZEY (char *str); +int isWEIGHTS (char *str); +int isV (char *str); +int isVx (char *str); +int isVy (char *str); +int isD (char *str); +int isDx (char *str); +int isDy (char *str); +int isDxx (char *str); +int isDyy (char *str); +int isB (char *str); +int isWEIGHTS_PCA (char *str); +int isPCA (char *str); +int isPCAcoeff (char *str); +int isCASCADE_Th (char *str); +int isHYPOTHES_PCA (char *str); +int isDEFORM_PCA (char *str); +int isHYPOTHES (char *str); +int isDEFORM (char *str); +int getTeg (char *str); + +void addFilter(CvLSVMFilterObjectCaskade *** model, int *last, int *max); + +void parserCascadeThresholds (FILE * xmlf, CvLSVMFilterObjectCaskade * model); + +void parserRFilter (FILE * xmlf, int p, int pca, CvLSVMFilterObjectCaskade * model, float *b); + +void parserV (FILE * xmlf, int /*p*/, CvLSVMFilterObjectCaskade * model); + +void parserD (FILE * xmlf, int /*p*/, CvLSVMFilterObjectCaskade * model); + +void parserPFilter (FILE * xmlf, int p, int pca, int /*N_path*/, CvLSVMFilterObjectCaskade * model); + +void parserPFilterS (FILE * xmlf, int p, int pca, CvLSVMFilterObjectCaskade *** model, int *last, int *max); + +void parserComp (FILE * xmlf, int p, int pca, int *N_comp, CvLSVMFilterObjectCaskade *** model, float *b, int *last, int *max); + +void parserModel(FILE * xmlf, CvLSVMFilterObjectCaskade *** model, int *last, int *max, int **comp, float **b, int *count, float * score, float** PCAcoeff); + +void LSVMparser(const char * filename, + CvLSVMFilterObjectCaskade *** model, + int *last, + int *max, + int **comp, + float **b, + int *count, + float * score, + float** PCAcoeff); + +int isMODEL (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return MODEL; + if(strcmp(etag, str) == 0)return EMODEL; + return 0; +} +int isP (char *str){ + char stag [] = "

"; + char etag [] = "

"; + if(strcmp(stag, str) == 0)return P; + if(strcmp(etag, str) == 0)return EP; + return 0; +} +int isSCORE (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return SCORE; + if(strcmp(etag, str) == 0)return ESCORE; + return 0; +} +int isCOMP (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return COMP; + if(strcmp(etag, str) == 0)return ECOMP; + return 0; +} +int isRFILTER (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return RFILTER; + if(strcmp(etag, str) == 0)return ERFILTER; + return 0; +} +int isPFILTERs (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return PFILTERs; + if(strcmp(etag, str) == 0)return EPFILTERs; + return 0; +} +int isPFILTER (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return PFILTER; + if(strcmp(etag, str) == 0)return EPFILTER; + return 0; +} +int isSIZEX (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return SIZEX; + if(strcmp(etag, str) == 0)return ESIZEX; + return 0; +} +int isSIZEY (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return SIZEY; + if(strcmp(etag, str) == 0)return ESIZEY; + return 0; +} +int isWEIGHTS (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return WEIGHTS; + if(strcmp(etag, str) == 0)return EWEIGHTS; + return 0; +} +int isV (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return TAGV; + if(strcmp(etag, str) == 0)return ETAGV; + return 0; +} +int isVx (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Vx; + if(strcmp(etag, str) == 0)return EVx; + return 0; +} +int isVy (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Vy; + if(strcmp(etag, str) == 0)return EVy; + return 0; +} +int isD (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return TAGD; + if(strcmp(etag, str) == 0)return ETAGD; + return 0; +} +int isDx (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Dx; + if(strcmp(etag, str) == 0)return EDx; + return 0; +} +int isDy (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Dy; + if(strcmp(etag, str) == 0)return EDy; + return 0; +} +int isDxx (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Dxx; + if(strcmp(etag, str) == 0)return EDxx; + return 0; +} +int isDyy (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return Dyy; + if(strcmp(etag, str) == 0)return EDyy; + return 0; +} +int isB (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return BTAG; + if(strcmp(etag, str) == 0)return EBTAG; + return 0; +} + +int isWEIGHTS_PCA (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return WEIGHTSPCA; + if(strcmp(etag, str) == 0)return EWEIGHTSPCA; + return 0; +} + +int isPCA (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return PCA; + if(strcmp(etag, str) == 0)return EPCA; + return 0; +} + +int isPCAcoeff (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return PCACOEFF; + if(strcmp(etag, str) == 0)return EPCACOEFF; + return 0; +} + +int isCASCADE_Th (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return CASCADE_Th; + if(strcmp(etag, str) == 0)return ECASCADE_Th; + return 0; +} + +int isHYPOTHES_PCA (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return HYPOTHES_PCA; + if(strcmp(etag, str) == 0)return EHYPOTHES_PCA; + return 0; +} +int isDEFORM_PCA (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return DEFORM_PCA; + if(strcmp(etag, str) == 0)return EDEFORM_PCA; + return 0; +} +int isHYPOTHES (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return HYPOTHES; + if(strcmp(etag, str) == 0)return EHYPOTHES; + return 0; +} +int isDEFORM (char *str){ + char stag [] = ""; + char etag [] = ""; + if(strcmp(stag, str) == 0)return DEFORM; + if(strcmp(etag, str) == 0)return EDEFORM; + return 0; +} + +int getTeg(char *str){ + int sum = 0; + sum = isMODEL (str)+ + isP (str)+ + isSCORE (str)+ + isCOMP (str)+ + isRFILTER (str)+ + isPFILTERs (str)+ + isPFILTER (str)+ + isSIZEX (str)+ + isSIZEY (str)+ + isWEIGHTS (str)+ + isV (str)+ + isVx (str)+ + isVy (str)+ + isD (str)+ + isDx (str)+ + isDy (str)+ + isDxx (str)+ + isDyy (str)+ + isB (str)+ + isPCA (str)+ + isCASCADE_Th (str)+ + isHYPOTHES_PCA(str)+ + isDEFORM_PCA (str)+ + isHYPOTHES (str)+ + isDEFORM (str)+ + isWEIGHTS_PCA (str)+ + isPCAcoeff (str) + ; + + return sum; +} + +void addFilter(CvLSVMFilterObjectCaskade *** model, int *last, int *max) +{ + CvLSVMFilterObjectCaskade ** nmodel; + int i; + (*last) ++; + if((*last) >= (*max)){ + (*max) += 10; + nmodel = (CvLSVMFilterObjectCaskade **)malloc(sizeof(CvLSVMFilterObjectCaskade *) * (*max)); + for(i = 0; i < *last; i++){ + nmodel[i] = (* model)[i]; + } + free(* model); + (*model) = nmodel; + } + (*model) [(*last)] = (CvLSVMFilterObjectCaskade *)malloc(sizeof(CvLSVMFilterObjectCaskade)); + (*model) [(*last)]->Hypothesis = 0.0f; + (*model) [(*last)]->Deformation = 0.0f; + (*model) [(*last)]->Hypothesis_PCA = 0.0f; + (*model) [(*last)]->Deformation_PCA = 0.0f; + +} + +//############################################## +void parserCascadeThresholds (FILE * xmlf, CvLSVMFilterObjectCaskade * model){ + int st = 0; + int tag; + int tagVal; + char ch; + int i,j; + char buf[1024]; + char tagBuf[1024]; + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char) fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == ECASCADE_Th){ + return; + } + if(tagVal == HYPOTHES_PCA){ + st = 1; + i = 0; + } + if(tagVal == EHYPOTHES_PCA){ + st = 0; + buf[i] = '\0'; + model->Hypothesis_PCA =(float) atof(buf); + } + if(tagVal == DEFORM_PCA){ + st = 1; + i = 0; + } + if(tagVal == EDEFORM_PCA){ + st = 0; + buf[i] = '\0'; + model->Deformation_PCA =(float) atof(buf); + } + if(tagVal == HYPOTHES){ + st = 1; + i = 0; + } + if(tagVal == EHYPOTHES){ + st = 0; + buf[i] = '\0'; + model->Hypothesis = (float)atof(buf); + } + if(tagVal == DEFORM){ + st = 1; + i = 0; + } + if(tagVal == EDEFORM){ + st = 0; + buf[i] = '\0'; + model->Deformation = (float)atof(buf); + } + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} +//############################################## + +void parserRFilter (FILE * xmlf, int p, int pca, CvLSVMFilterObjectCaskade * model, float *b){ + int st = 0; + int sizeX = 0, sizeY = 0; + int tag; + int tagVal; + char ch; + int i,j,ii; + char buf[1024]; + char tagBuf[1024]; + double *data; + //printf("\n"); + + model->V.x = 0; + model->V.y = 0; + model->V.l = 0; + model->fineFunction[0] = 0.0; + model->fineFunction[1] = 0.0; + model->fineFunction[2] = 0.0; + model->fineFunction[3] = 0.0; + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == ERFILTER){ + //printf("\n"); + return; + } + if(tagVal == SIZEX){ + st = 1; + i = 0; + } + if(tagVal == ESIZEX){ + st = 0; + buf[i] = '\0'; + sizeX = atoi(buf); + model->sizeX = sizeX; + //printf("%d\n", sizeX); + } + if(tagVal == SIZEY){ + st = 1; + i = 0; + } + if(tagVal == ESIZEY){ + st = 0; + buf[i] = '\0'; + sizeY = atoi(buf); + model->sizeY = sizeY; + //printf("%d\n", sizeY); + } + if(tagVal == WEIGHTS){ + data = (double *)malloc( sizeof(double) * p * sizeX * sizeY); + CV_Assert(fread(data, sizeof(double), p * sizeX * sizeY, xmlf)); + model->H = (float *)malloc(sizeof(float)* p * sizeX * sizeY); + for(ii = 0; ii < p * sizeX * sizeY; ii++){ + model->H[ii] = (float)data[ii]; + } + free(data); + } + if(tagVal == WEIGHTSPCA){ + data = (double *)malloc( sizeof(double) * pca * sizeX * sizeY); + CV_Assert(fread(data, sizeof(double), pca * sizeX * sizeY, xmlf)); + model->H_PCA = (float *)malloc(sizeof(float)* pca * sizeX * sizeY); + for(ii = 0; ii < pca * sizeX * sizeY; ii++){ + model->H_PCA[ii] = (float)data[ii]; + } + free(data); + } + + if(tagVal == CASCADE_Th){ + parserCascadeThresholds (xmlf, model); + } + + if(tagVal == BTAG){ + st = 1; + i = 0; + } + if(tagVal == EBTAG){ + st = 0; + buf[i] = '\0'; + *b =(float) atof(buf); + //printf("%f\n", *b); + } + + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} + +void parserV (FILE * xmlf, int /*p*/, CvLSVMFilterObjectCaskade * model){ + int st = 0; + int tag; + int tagVal; + char ch; + int i,j; + char buf[1024]; + char tagBuf[1024]; + //printf(" \n"); + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char) fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == ETAGV){ + //printf(" \n"); + return; + } + if(tagVal == Vx){ + st = 1; + i = 0; + } + if(tagVal == EVx){ + st = 0; + buf[i] = '\0'; + model->V.x = atoi(buf); + //printf(" %d\n", model->V.x); + } + if(tagVal == Vy){ + st = 1; + i = 0; + } + if(tagVal == EVy){ + st = 0; + buf[i] = '\0'; + model->V.y = atoi(buf); + //printf(" %d\n", model->V.y); + } + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} +void parserD (FILE * xmlf, int /*p*/, CvLSVMFilterObjectCaskade * model){ + int st = 0; + int tag; + int tagVal; + char ch; + int i,j; + char buf[1024]; + char tagBuf[1024]; + //printf(" \n"); + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == ETAGD){ + //printf(" \n"); + return; + } + if(tagVal == Dx){ + st = 1; + i = 0; + } + if(tagVal == EDx){ + st = 0; + buf[i] = '\0'; + + model->fineFunction[0] = (float)atof(buf); + //printf(" %f\n", model->fineFunction[0]); + } + if(tagVal == Dy){ + st = 1; + i = 0; + } + if(tagVal == EDy){ + st = 0; + buf[i] = '\0'; + + model->fineFunction[1] = (float)atof(buf); + //printf(" %f\n", model->fineFunction[1]); + } + if(tagVal == Dxx){ + st = 1; + i = 0; + } + if(tagVal == EDxx){ + st = 0; + buf[i] = '\0'; + + model->fineFunction[2] = (float)atof(buf); + //printf(" %f\n", model->fineFunction[2]); + } + if(tagVal == Dyy){ + st = 1; + i = 0; + } + if(tagVal == EDyy){ + st = 0; + buf[i] = '\0'; + + model->fineFunction[3] = (float)atof(buf); + //printf(" %f\n", model->fineFunction[3]); + } + + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} + +void parserPFilter (FILE * xmlf, int p, int pca, int /*N_path*/, CvLSVMFilterObjectCaskade * model){ + int st = 0; + int sizeX = 0, sizeY = 0; + int tag; + int tagVal; + char ch; + int i,j, ii; + char buf[1024]; + char tagBuf[1024]; + double *data; + //printf(" (%d)\n", N_path); + + model->V.x = 0; + model->V.y = 0; + model->V.l = 0; + model->fineFunction[0] = 0.0f; + model->fineFunction[1] = 0.0f; + model->fineFunction[2] = 0.0f; + model->fineFunction[3] = 0.0f; + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == EPFILTER){ + //printf("\n"); + return; + } + + if(tagVal == TAGV){ + parserV(xmlf, p, model); + } + if(tagVal == TAGD){ + parserD(xmlf, p, model); + } + if(tagVal == SIZEX){ + st = 1; + i = 0; + } + if(tagVal == ESIZEX){ + st = 0; + buf[i] = '\0'; + sizeX = atoi(buf); + model->sizeX = sizeX; + //printf("%d\n", sizeX); + } + if(tagVal == SIZEY){ + st = 1; + i = 0; + } + if(tagVal == ESIZEY){ + st = 0; + buf[i] = '\0'; + sizeY = atoi(buf); + model->sizeY = sizeY; + //printf("%d\n", sizeY); + } + if(tagVal == WEIGHTS){ + data = (double *)malloc( sizeof(double) * p * sizeX * sizeY); + CV_Assert(fread(data, sizeof(double), p * sizeX * sizeY, xmlf)); + model->H = (float *)malloc(sizeof(float)* p * sizeX * sizeY); + for(ii = 0; ii < p * sizeX * sizeY; ii++){ + model->H[ii] = (float)data[ii]; + } + free(data); + } + if(tagVal == WEIGHTSPCA){ + data = (double *)malloc( sizeof(double) * pca * sizeX * sizeY); + CV_Assert(fread(data, sizeof(double), pca * sizeX * sizeY, xmlf)); + model->H_PCA = (float *)malloc(sizeof(float)* pca * sizeX * sizeY); + for(ii = 0; ii < pca * sizeX * sizeY; ii++){ + model->H_PCA[ii] = (float)data[ii]; + } + free(data); + } + if(tagVal == CASCADE_Th){ + parserCascadeThresholds (xmlf, model); + } + if(tagVal == EWEIGHTS){ + //printf("WEIGHTS OK\n"); + } + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} +void parserPFilterS (FILE * xmlf, int p, int pca, CvLSVMFilterObjectCaskade *** model, int *last, int *max){ + int st = 0; + int N_path = 0; + int tag; + int tagVal; + char ch; + int j; + char tagBuf[1024]; + //printf("\n"); + + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == EPFILTERs){ + //printf("\n"); + return; + } + if(tagVal == PFILTER){ + addFilter(model, last, max); + parserPFilter (xmlf, p, pca, N_path, (*model)[*last]); + N_path++; + } + tag = 0; + }else{ + if((tag != 0) || (st != 1)){ + tagBuf[j] = ch; j++; + } + } + } + } +} +void parserComp (FILE * xmlf, int p, int pca, int *N_comp, CvLSVMFilterObjectCaskade *** model, float *b, int *last, int *max){ + int st = 0; + int tag; + int tagVal; + char ch; + int j; + char tagBuf[1024]; + //printf(" %d\n", *N_comp); + + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == ECOMP){ + (*N_comp) ++; + return; + } + if(tagVal == RFILTER){ + addFilter(model, last, max); + parserRFilter (xmlf, p, pca, (*model)[*last],b); + } + if(tagVal == PFILTERs){ + parserPFilterS (xmlf, p, pca, model, last, max); + } + tag = 0; + }else{ + if((tag != 0) || (st != 1)){ + tagBuf[j] = ch; j++; + } + } + } + } +} +void parserModel(FILE * xmlf, CvLSVMFilterObjectCaskade *** model, int *last, int *max, int **comp, float **b, int *count, float * score, float** PCAcoeff){ + int p = 0, pca = 0; + int N_comp = 0; + int * cmp; + float *bb; + int st = 0; + int tag; + int tagVal; + char ch; + int i,j, ii = 0, jj; + char buf[1024]; + char tagBuf[1024]; + double *data; + //printf("\n"); + + i = 0; + j = 0; + st = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char)fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + + tagVal = getTeg(tagBuf); + + if(tagVal == EMODEL){ + //printf("\n"); + for(ii = 0; ii <= *last; ii++){ + (*model)[ii]->numFeatures = p; + } + * count = N_comp; + return; + } + if(tagVal == COMP){ + if(N_comp == 0){ + cmp = (int *)malloc(sizeof(int)); + bb = (float *)malloc(sizeof(float)); + * comp = cmp; + * b = bb; + * count = N_comp + 1; + } else { + cmp = (int *)malloc(sizeof(int) * (N_comp + 1)); + bb = (float *)malloc(sizeof(float) * (N_comp + 1)); + for(ii = 0; ii < N_comp; ii++){ + cmp[ii] = (* comp)[ii]; + bb [ii] = (* b )[ii]; + } + free(* comp); + free(* b ); + * comp = cmp; + * b = bb; + * count = N_comp + 1; + } + parserComp(xmlf, p, pca, &N_comp, model, &((*b)[N_comp]), last, max); + cmp[N_comp - 1] = *last; + } + if(tagVal == P){ + st = 1; + i = 0; + } + if(tagVal == EP){ + st = 0; + buf[i] = '\0'; + p = atoi(buf); + //printf("

%d

\n", p); + } + if(tagVal == PCA){ + st = 1; + i = 0; + } + if(tagVal == EPCA){ + st = 0; + buf[i] = '\0'; + pca = atoi(buf); + //printf("%d\n", p); + } + if(tagVal == SCORE){ + st = 1; + i = 0; + } + if(tagVal == PCACOEFF){ + st = 0; + i = 0; + p--; + data = (double *)malloc( sizeof(double) * p * p); + (*PCAcoeff) = (float *)malloc( sizeof(float) * p * p); + CV_Assert(fread(data, sizeof(double), p * p, xmlf)); + for(jj = 0; jj < p * p; jj++){ + (*PCAcoeff)[jj] = (float)data[jj]; + } + free(data); + p++; + } + if(tagVal == EPCACOEFF){ + st = 0; + //printf("%d\n", p); + } + if(tagVal == SCORE){ + st = 1; + i = 0; + } + if(tagVal == ESCORE){ + st = 0; + buf[i] = '\0'; + *score = (float)atof(buf); + //printf("%f\n", score); + } + tag = 0; + i = 0; + }else{ + if((tag == 0)&& (st == 1)){ + buf[i] = ch; i++; + }else{ + tagBuf[j] = ch; j++; + } + } + } + } +} + +void LSVMparser(const char * filename, + CvLSVMFilterObjectCaskade *** model, + int *last, + int *max, + int **comp, + float **b, + int *count, + float * score, + float** PCAcoeff) +{ + int tag; + char ch; + int j; + FILE *xmlf; + char tagBuf[1024]; + + (*max) = 10; + (*last) = -1; + (*model) = (CvLSVMFilterObjectCaskade ** )malloc((sizeof(CvLSVMFilterObjectCaskade * )) * (*max)); + + //printf("parse : %s\n", filename); + xmlf = fopen(filename, "rb"); + + j = 0; + tag = 0; + while(!feof(xmlf)){ + ch = (char) fgetc( xmlf ); + if(ch == '<'){ + tag = 1; + j = 1; + tagBuf[j - 1] = ch; + }else { + if(ch == '>'){ + tag = 0; + tagBuf[j ] = ch; + tagBuf[j + 1] = '\0'; + if(getTeg(tagBuf) == MODEL){ + parserModel(xmlf, model, last, max, comp, b, count, score, PCAcoeff); + } + }else{ + if(tag != 0) + { + tagBuf[j] = ch; j++; + } + } + } + } + fclose(xmlf); +} + +int loadModel( + const char *modelPath, + + CvLSVMFilterObjectCaskade ***filters, + int *kFilters, + int *kComponents, + int **kPartFilters, + float **b, + float *scoreThreshold, + float ** PCAcoeff){ + int last; + int max; + int *comp; + int count; + int i; + float score; + + LSVMparser(modelPath, filters, &last, &max, &comp, b, &count, &score, PCAcoeff); + (*kFilters) = last + 1; + (*kComponents) = count; + (*scoreThreshold) = (float) score; + + (*kPartFilters) = (int *)malloc(sizeof(int) * count); + + for(i = 1; i < count;i++){ + (*kPartFilters)[i] = (comp[i] - comp[i - 1]) - 1; + } + (*kPartFilters)[0] = comp[0]; + + + for(i = 0; i < (*kFilters);i++){ + (*(filters))[i]->deltaX = 5;// maxX; + (*(filters))[i]->deltaY = 5;// maxY; + } + + return 0; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_resizeimg.cpp b/modules/latentsvm/src/lsvmc_resizeimg.cpp new file mode 100644 index 00000000000..0fcd503d747 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_resizeimg.cpp @@ -0,0 +1,71 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_resizeimg.h" +#include +#include +#include + +namespace cv +{ +namespace lsvm +{ + +IplImage* resize_opencv(IplImage* img, float scale) +{ + IplImage* imgTmp; + + int W, H, tW, tH; + + W = img->width; + H = img->height; + + tW = (int)(((float)W) * scale + 0.5); + tH = (int)(((float)H) * scale + 0.5); + + imgTmp = cvCreateImage(cvSize(tW , tH), img->depth, img->nChannels); + cvResize(img, imgTmp, CV_INTER_AREA); + + return imgTmp; +} +} +} diff --git a/modules/latentsvm/src/lsvmc_routine.cpp b/modules/latentsvm/src/lsvmc_routine.cpp new file mode 100644 index 00000000000..1c7152c9fb0 --- /dev/null +++ b/modules/latentsvm/src/lsvmc_routine.cpp @@ -0,0 +1,129 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "_lsvmc_routine.h" +namespace cv +{ +namespace lsvm +{ +int allocFilterObject(CvLSVMFilterObjectCaskade **obj, const int sizeX, + const int sizeY, const int numFeatures) +{ + int i; + (*obj) = (CvLSVMFilterObjectCaskade *)malloc(sizeof(CvLSVMFilterObjectCaskade)); + (*obj)->sizeX = sizeX; + (*obj)->sizeY = sizeY; + (*obj)->numFeatures = numFeatures; + (*obj)->fineFunction[0] = 0.0f; + (*obj)->fineFunction[1] = 0.0f; + (*obj)->fineFunction[2] = 0.0f; + (*obj)->fineFunction[3] = 0.0f; + (*obj)->V.x = 0; + (*obj)->V.y = 0; + (*obj)->V.l = 0; + (*obj)->H = (float *) malloc(sizeof (float) * + (sizeX * sizeY * numFeatures)); + for(i = 0; i < sizeX * sizeY * numFeatures; i++) + { + (*obj)->H[i] = 0.0f; + } + return LATENT_SVM_OK; +} +int freeFilterObject (CvLSVMFilterObjectCaskade **obj) +{ + if(*obj == NULL) return LATENT_SVM_MEM_NULL; + free((*obj)->H); + free(*obj); + (*obj) = NULL; + return LATENT_SVM_OK; +} + +int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, + const int sizeY, const int numFeatures) +{ + int i; + (*obj) = (CvLSVMFeatureMapCaskade *)malloc(sizeof(CvLSVMFeatureMapCaskade)); + (*obj)->sizeX = sizeX; + (*obj)->sizeY = sizeY; + (*obj)->numFeatures = numFeatures; + (*obj)->map = (float *) malloc(sizeof (float) * + (sizeX * sizeY * numFeatures)); + for(i = 0; i < sizeX * sizeY * numFeatures; i++) + { + (*obj)->map[i] = 0.0f; + } + return LATENT_SVM_OK; +} +int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj) +{ + if(*obj == NULL) return LATENT_SVM_MEM_NULL; + free((*obj)->map); + free(*obj); + (*obj) = NULL; + return LATENT_SVM_OK; +} + +int allocFeaturePyramidObject(CvLSVMFeaturePyramidCaskade **obj, + const int numLevels) +{ + (*obj) = (CvLSVMFeaturePyramidCaskade *)malloc(sizeof(CvLSVMFeaturePyramidCaskade)); + (*obj)->numLevels = numLevels; + (*obj)->pyramid = (CvLSVMFeatureMapCaskade **)malloc( + sizeof(CvLSVMFeatureMapCaskade *) * numLevels); + return LATENT_SVM_OK; +} + +int freeFeaturePyramidObject (CvLSVMFeaturePyramidCaskade **obj) +{ + int i; + if(*obj == NULL) return LATENT_SVM_MEM_NULL; + for(i = 0; i < (*obj)->numLevels; i++) + { + freeFeatureMapObject(&((*obj)->pyramid[i])); + } + free((*obj)->pyramid); + free(*obj); + (*obj) = NULL; + return LATENT_SVM_OK; +} +} +} diff --git a/modules/latentsvm/src/precomp.cpp b/modules/latentsvm/src/precomp.cpp new file mode 100644 index 00000000000..e65b780222b --- /dev/null +++ b/modules/latentsvm/src/precomp.cpp @@ -0,0 +1,44 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +/* End of file. */ diff --git a/modules/datasets/src/precomp.hpp b/modules/latentsvm/src/precomp.hpp similarity index 77% rename from modules/datasets/src/precomp.hpp rename to modules/latentsvm/src/precomp.hpp index da5bf8b8323..53a5686f486 100644 --- a/modules/datasets/src/precomp.hpp +++ b/modules/latentsvm/src/precomp.hpp @@ -10,7 +10,7 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2014, Itseez Inc, all rights reserved. +// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -29,7 +29,7 @@ // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Itseez Inc or contributors be liable for any direct, +// In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused @@ -42,9 +42,26 @@ #ifndef __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__ -#include -#include // atoi, atof +#ifdef HAVE_CVCONFIG_H +#include "cvconfig.h" +#endif + +#ifdef __cplusplus +#include +#include +#endif + +#include "opencv2/latentsvm.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/imgproc/imgproc_c.h" +#include "opencv2/core/core_c.h" -#include +#include "opencv2/opencv_modules.hpp" +#include "opencv2/highgui/highgui_c.h" + + +#ifdef HAVE_TEGRA_OPTIMIZATION +#include "opencv2/objdetect/objdetect_tegra.hpp" +#endif #endif diff --git a/modules/latentsvm/test/test_latentsvmdetector_caskade.cpp b/modules/latentsvm/test/test_latentsvmdetector_caskade.cpp new file mode 100644 index 00000000000..bf14793b2d2 --- /dev/null +++ b/modules/latentsvm/test/test_latentsvmdetector_caskade.cpp @@ -0,0 +1,219 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#include +//#include "opencv2/highgui/highgui_c.h" + +#ifdef HAVE_CVCONFIG_H +#include "cvconfig.h" +#endif + +#ifdef HAVE_TBB +#include "tbb/task_scheduler_init.h" +#endif + +using namespace cv; + +const float score_thr = 0.05f; + + +class LatentSVMDetectorCaskadeTest : public cvtest::BaseTest +{ +protected: + void run(int); +}; + +static void writeDetections( FileStorage& fs, const std::string& nodeName, const std::vector& detections ) +{ + fs << nodeName << "["; + for( size_t i = 0; i < detections.size(); i++ ) //FIXME operator << + { + lsvm::LSVMDetector::ObjectDetection const &d = detections[i]; + fs << d.rect.x << d.rect.y << d.rect.width << d.rect.height + << d.score << d.classID; + } + fs << "]"; +} + +static void readDetections( FileStorage fs, const std::string& nodeName, + std::vector& detections ) +{ + detections.clear(); + + FileNode fn = fs.root()[nodeName]; + FileNodeIterator fni = fn.begin(); + while( fni != fn.end() ) + { + lsvm::LSVMDetector::ObjectDetection d; + fni >> d.rect.x >> d.rect.y >> d.rect.width >> d.rect.height + >> d.score >> d.classID; + detections.push_back( d ); + } +} + +static inline bool isEqualCaskad( const lsvm::LSVMDetector::ObjectDetection& d1, + const lsvm::LSVMDetector::ObjectDetection& d2, int eps, float threshold) +{ + return ( + std::abs(d1.rect.x - d2.rect.x) <= eps + && std::abs(d1.rect.y - d2.rect.y) <= eps + && std::abs(d1.rect.width - d2.rect.width) <= eps + && std::abs(d1.rect.height - d2.rect.height) <= eps + && (d1.classID == d2.classID) + && std::abs(d1.score - d2.score) <= threshold + ); +} + + +bool compareResults( const std::vector& calc, + const std::vector& valid, int eps, float threshold) +{ + if( calc.size() != valid.size() ) + return false; + + for( size_t i = 0; i < calc.size(); i++ ) + { + lsvm::LSVMDetector::ObjectDetection const &c = calc[i]; + lsvm::LSVMDetector::ObjectDetection const &v = valid[i]; + + if( !isEqualCaskad(c, v, eps, threshold) ) + { + std::cerr << "Expected: " << v.rect << " class=" << v.classID << " score=" << v.score << std::endl; + std::cerr << "Actual: " << c.rect << " class=" << c.classID << " score=" << c.score << std::endl; + return false; + } + } + return true; +} + +void LatentSVMDetectorCaskadeTest::run( int /* start_from */) +{ + std::string img_path_cat = std::string(ts->get_data_path()) + "cat.png"; + std::string img_path_cars = std::string(ts->get_data_path()) + "cars.png"; + + std::string model_path_cat = std::string(ts->get_data_path()) + "models_VOC2007_caskade/cat.xml"; + std::string model_path_car = std::string(ts->get_data_path()) + "models_VOC2007_caskade/car.xml"; + + std::string true_res_path = std::string(ts->get_data_path()) + "results_caskad.xml"; + + +#ifdef HAVE_TBB + int numThreads = 2; +#endif + + Mat image_cat = imread( img_path_cat ); + Mat image_cars = imread( img_path_cars ); + if( image_cat.empty() || image_cars.empty() ) + { + ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA ); + return; + } + + // We will test 2 cases: + // detector1 - to test case of one class 'cat' + // detector12 - to test case of two (several) classes 'cat' and car + + // Load detectors + cv::Ptr detector1 = lsvm::LSVMDetector::create(std::vector(1,model_path_cat)); + + std::vector models_pathes(2); + models_pathes[0] = model_path_cat; + models_pathes[1] = model_path_car; + cv::Ptr detector12 = lsvm::LSVMDetector::create(models_pathes); + + if( detector1->isEmpty() || detector12->isEmpty() || detector12->getClassCount() != 2 ) + { + ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA ); + return; + } + + // 1. Test method detect + // Run detectors + std::vector detections1_cat, detections12_cat, detections12_cars; + detector1->detect( image_cat, detections1_cat, 0.5); + detector12->detect( image_cat, detections12_cat, 0.5); + detector12->detect( image_cars, detections12_cars, 0.5); + + // Load true results + FileStorage fs( true_res_path, FileStorage::READ ); + if( fs.isOpened() ) + { + std::vector true_detections1_cat, true_detections12_cat, true_detections12_cars; + readDetections( fs, "detections1_cat", true_detections1_cat ); + readDetections( fs, "detections12_cat", true_detections12_cat ); + readDetections( fs, "detections12_cars", true_detections12_cars ); + + if( !compareResults(detections1_cat, true_detections1_cat, 1, score_thr) ) + { + std::cerr << "Results of detector1 are invalid on image cat.png" << std::endl; + ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH ); + } + if( !compareResults(detections12_cat, true_detections12_cat, 1, score_thr) ) + { + std::cerr << "Results of detector12 are invalid on image cat.png" << std::endl; + ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH ); + } + if( !compareResults(detections12_cars, true_detections12_cars, 1, score_thr) ) + { + std::cerr << "Results of detector12 are invalid on image cars.png" << std::endl; + ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH ); + } + } + else + { + fs.open( true_res_path, FileStorage::WRITE ); + if( fs.isOpened() ) + { + writeDetections( fs, "detections1_cat", detections1_cat ); + writeDetections( fs, "detections12_cat", detections12_cat ); + writeDetections( fs, "detections12_cars", detections12_cars ); + } + else + std::cerr << "File " << true_res_path << " cann't be opened to save test results" << std::endl; + } + + ts->set_failed_test_info( cvtest::TS::OK); +} + +TEST(Objdetect_LatentSVMDetectorCaskade_cpp, regression) { LatentSVMDetectorCaskadeTest test; test.safe_run(); } diff --git a/modules/latentsvm/test/test_main.cpp b/modules/latentsvm/test/test_main.cpp new file mode 100644 index 00000000000..6d0975e242e --- /dev/null +++ b/modules/latentsvm/test/test_main.cpp @@ -0,0 +1,3 @@ +#include "test_precomp.hpp" + +CV_TEST_MAIN("latentsvm") diff --git a/modules/latentsvm/test/test_precomp.cpp b/modules/latentsvm/test/test_precomp.cpp new file mode 100644 index 00000000000..5956e13e3e9 --- /dev/null +++ b/modules/latentsvm/test/test_precomp.cpp @@ -0,0 +1 @@ +#include "test_precomp.hpp" diff --git a/modules/latentsvm/test/test_precomp.hpp b/modules/latentsvm/test/test_precomp.hpp new file mode 100644 index 00000000000..d32f480e179 --- /dev/null +++ b/modules/latentsvm/test/test_precomp.hpp @@ -0,0 +1,17 @@ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/latentsvm.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/highgui.hpp" + +#endif diff --git a/modules/latentsvm/testdata/latentsvm/cars.png b/modules/latentsvm/testdata/latentsvm/cars.png new file mode 100644 index 00000000000..2d9bdf260bb Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/cars.png differ diff --git a/modules/latentsvm/testdata/latentsvm/cat.png b/modules/latentsvm/testdata/latentsvm/cat.png new file mode 100644 index 00000000000..31f4e2e581d Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/cat.png differ diff --git a/modules/latentsvm/testdata/latentsvm/mat2xml.m b/modules/latentsvm/testdata/latentsvm/mat2xml.m new file mode 100644 index 00000000000..61bc9bf96f0 --- /dev/null +++ b/modules/latentsvm/testdata/latentsvm/mat2xml.m @@ -0,0 +1,99 @@ +function [] = mat2xml(fname_in, fname_out) +load(fname_in); +num_feat = 31; +rootfilters = []; +for i = 1:length(model.rootfilters) + rootfilters{i} = model.rootfilters{i}.w; +end +partfilters = []; +for i = 1:length(model.partfilters) + partfilters{i} = model.partfilters{i}.w; +end +for c = 1:model.numcomponents + ridx{c} = model.components{c}.rootindex; + oidx{c} = model.components{c}.offsetindex; + root{c} = model.rootfilters{ridx{c}}.w; + rsize{c} = [size(root{c},1) size(root{c},2)]; + numparts{c} = length(model.components{c}.parts); + for j = 1:numparts{c} + pidx{c,j} = model.components{c}.parts{j}.partindex; + didx{c,j} = model.components{c}.parts{j}.defindex; + part{c,j} = model.partfilters{pidx{c,j}}.w; + psize{c,j} = [size(part{c,j},1) size(part{c,j},2)]; + % reverse map from partfilter index to (component, part#) + % rpidx{pidx{c,j}} = [c j]; + end +end + +f = fopen(fname_out, 'wb'); +fprintf(f, '\n'); +fprintf(f, '\t\n'); +fprintf(f, '\t%d\n', model.numcomponents); +fprintf(f, '\t\n'); +fprintf(f, '\t

%d

\n', num_feat); +fprintf(f, '\t\n'); +fprintf(f, '\t%.16f\n', model.thresh); +for c = 1:model.numcomponents + fprintf(f, '\t\n'); + fprintf(f, '\t\t\n'); + fprintf(f, '\t\t\n'); + fprintf(f, '\t\t\t\n'); + rootfilter = root{c}; + fprintf(f, '\t\t\t%d\n', rsize{c}(2)); + fprintf(f, '\t\t\t%d\n', rsize{c}(1)); + fprintf(f, '\t\t\t\n'); + fprintf(f, '\t\t\t'); + for jj = 1:rsize{c}(1) + for ii = 1:rsize{c}(2) + for kk = 1:num_feat + fwrite(f, rootfilter(jj, ii, kk), 'double'); + end + end + end + fprintf(f, '\t\t\t\n'); + fprintf(f, '\t\t\t\n'); + fprintf(f, '\t\t\t%.16f\n', model.offsets{1,c}.w); + fprintf(f, '\t\t\n\n'); + fprintf(f, '\t\t\n'); + fprintf(f, '\t\t\n'); + fprintf(f, '\t\t\t%d\n', numparts{c}); + + for j=1:numparts{c} + fprintf(f, '\t\t\t\n', j); + fprintf(f, '\t\t\t\n'); + partfilter = part{c,j}; + anchor = model.defs{didx{c,j}}.anchor; + def = model.defs{didx{c,j}}.w; + + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t%d\n', psize{c,j}(2)); + fprintf(f, '\t\t\t\t%d\n', psize{c,j}(1)); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t'); + for jj = 1:psize{c,j}(1) + for ii = 1:psize{c,j}(2) + for kk = 1:num_feat + fwrite(f, partfilter(jj, ii, kk), 'double'); + end + end + end + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\t%d\n', anchor(1)); + fprintf(f, '\t\t\t\t\t%d\n', anchor(2)); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\t\t%.16f\n', def(2)); + fprintf(f, '\t\t\t\t\t%.16f\n', def(4)); + fprintf(f, '\t\t\t\t\t%.16f\n', def(1)); + fprintf(f, '\t\t\t\t\t%.16f\n', def(3)); + fprintf(f, '\t\t\t\t\n'); + fprintf(f, '\t\t\t\n'); + end + fprintf(f, '\t\t\n'); + fprintf(f, '\t\n'); +end +fprintf(f, '
'); +fclose(f); diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/aeroplane.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/aeroplane.xml new file mode 100644 index 00000000000..4d37fe24874 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/aeroplane.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bicycle.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bicycle.xml new file mode 100644 index 00000000000..7ec2e3cfce0 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bicycle.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bird.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bird.xml new file mode 100644 index 00000000000..3d18a7624d5 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bird.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/boat.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/boat.xml new file mode 100644 index 00000000000..18977d1cc89 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/boat.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bottle.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bottle.xml new file mode 100644 index 00000000000..3e756d81859 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bottle.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bus.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bus.xml new file mode 100644 index 00000000000..9ac5697d1b0 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/bus.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/car.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/car.xml new file mode 100644 index 00000000000..30fa895d57b Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/car.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cat.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cat.xml new file mode 100644 index 00000000000..1ccdef8b4d5 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cat.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/chair.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/chair.xml new file mode 100644 index 00000000000..4b37f0d753c Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/chair.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cow.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cow.xml new file mode 100644 index 00000000000..d714029ab8f Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/cow.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/diningtable.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/diningtable.xml new file mode 100644 index 00000000000..e9d55387fe0 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/diningtable.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/dog.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/dog.xml new file mode 100644 index 00000000000..6a4f958b558 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/dog.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/horse.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/horse.xml new file mode 100644 index 00000000000..e0e7ededc26 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/horse.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/motorbike.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/motorbike.xml new file mode 100644 index 00000000000..7fff6a4b37f Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/motorbike.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/person.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/person.xml new file mode 100644 index 00000000000..6da86a47504 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/person.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/pottedplant.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/pottedplant.xml new file mode 100644 index 00000000000..578b5621582 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/pottedplant.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sheep.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sheep.xml new file mode 100644 index 00000000000..41a1f764c6f Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sheep.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sofa.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sofa.xml new file mode 100644 index 00000000000..ee264585803 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/sofa.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/train.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/train.xml new file mode 100644 index 00000000000..008aefba0a1 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/train.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/tvmonitor.xml b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/tvmonitor.xml new file mode 100644 index 00000000000..19a829cc9b2 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/models_VOC2007_caskade/tvmonitor.xml differ diff --git a/modules/latentsvm/testdata/latentsvm/pcacoeff.bin b/modules/latentsvm/testdata/latentsvm/pcacoeff.bin new file mode 100644 index 00000000000..7ca9cf38df6 Binary files /dev/null and b/modules/latentsvm/testdata/latentsvm/pcacoeff.bin differ diff --git a/modules/latentsvm/testdata/latentsvm/results_caskade.xml b/modules/latentsvm/testdata/latentsvm/results_caskade.xml new file mode 100644 index 00000000000..97a9256141c --- /dev/null +++ b/modules/latentsvm/testdata/latentsvm/results_caskade.xml @@ -0,0 +1,12 @@ + + + + 0 0 340 485 -0.837739 0 + + 0 0 340 485 -0.837739 0 129 0 181 155 -0.795819 1 + +218 24 218 121 2.36436 1 0 285 233 129 1.93423 1 0 21 190 105 1.8496 1 202 183 202 +73 1.57262 1 0 171 171 68 1.49932 1 238 312 165 91 0.504801 1 0 181 226 90 0.404986 +1 0 0 240 171 0.158534 1 207 155 233 129 -0.0988589 1 195 278 250 139 -0.50933 1 + 89 0 328 119 -0.570692 1 0 295 422 154 -0.922104 1 + diff --git a/modules/line_descriptor/README.md b/modules/line_descriptor/README.md new file mode 100644 index 00000000000..6028de03ccd --- /dev/null +++ b/modules/line_descriptor/README.md @@ -0,0 +1,2 @@ +Binary descriptors for lines extracted from an image +==================================================== \ No newline at end of file diff --git a/modules/optflow/README.md b/modules/optflow/README.md new file mode 100644 index 00000000000..e0a04374d60 --- /dev/null +++ b/modules/optflow/README.md @@ -0,0 +1,2 @@ +Optical Flow Algorithms for tracking points +=========================================== \ No newline at end of file diff --git a/modules/reg/README.md b/modules/reg/README.md index cf1d59e3ff1..678e6f8a056 100644 --- a/modules/reg/README.md +++ b/modules/reg/README.md @@ -1,4 +1,5 @@ -# OpenCV pixel-intensity based registration module +OpenCV pixel-intensity based registration module +================================================ Author and maintainer: Alfonso Sanchez-Beato alfonsosanchezbeato\_\_\_\_gmail.com diff --git a/modules/rgbd/README.md b/modules/rgbd/README.md new file mode 100644 index 00000000000..6c2440a1ad5 --- /dev/null +++ b/modules/rgbd/README.md @@ -0,0 +1,2 @@ + RGB-Depth Processing module +============================ \ No newline at end of file diff --git a/modules/rgbd/include/opencv2/rgbd.hpp b/modules/rgbd/include/opencv2/rgbd.hpp index 10f98cc4b19..62947ae21e4 100644 --- a/modules/rgbd/include/opencv2/rgbd.hpp +++ b/modules/rgbd/include/opencv2/rgbd.hpp @@ -233,7 +233,7 @@ namespace rgbd /** * @param depth the depth image - * @param K + * @param in_K * @param in_points the list of xy coordinates * @param points3d the resulting 3d points */ @@ -259,7 +259,7 @@ namespace rgbd * Otherwise, the image is simply converted to floats * @param in the depth image (if given as short int CV_U, it is assumed to be the depth in millimeters * (as done with the Microsoft Kinect), it is assumed in meters) - * @param the desired output depth (floats or double) + * @param depth the desired output depth (floats or double) * @param out The rescaled float depth image */ CV_EXPORTS @@ -290,10 +290,10 @@ namespace rgbd /** Find The planes in a depth image * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels - * @param the normals for every point in the depth image + * @param normals the normals for every point in the depth image * @param mask An image where each pixel is labeled with the plane it belongs to * and 255 if it does not belong to any plane - * @param the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1 + * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0, norm(a,b,c)=1 * and c < 0 (so that the normal points towards the camera) */ void @@ -304,7 +304,7 @@ namespace rgbd * @param points3d the 3d points organized like the depth image: rows x cols with 3 channels * @param mask An image where each pixel is labeled with the plane it belongs to * and 255 if it does not belong to any plane - * @param the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0 + * @param plane_coefficients the coefficients of the corresponding planes (a,b,c,d) such that ax+by+cz+d=0 */ void operator()(InputArray points3d, OutputArray mask, OutputArray plane_coefficients); @@ -457,11 +457,10 @@ namespace rgbd /** Prepare a cache for the frame. The function checks the precomputed/passed data (throws the error if this data * does not satisfy) and computes all remaining cache data needed for the frame. Returned size is a resolution * of the prepared frame. - * @param odometry The odometry which will process the frame. + * @param frame The odometry which will process the frame. * @param cacheType The cache type: CACHE_SRC, CACHE_DST or CACHE_ALL. */ - virtual Size - prepareFrameCache(Ptr& frame, int cacheType) const; + virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; protected: virtual void @@ -488,14 +487,15 @@ namespace rgbd * @param iterCounts Count of iterations on each pyramid level. * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out * if they have gradient magnitude less than minGradientMagnitudes[level]. + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param transformType Class of transformation */ RgbdOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), const std::vector& iterCounts = std::vector(), const std::vector& minGradientMagnitudes = std::vector(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), int transformType = RIGID_BODY_MOTION); - virtual Size - prepareFrameCache(Ptr& frame, int cacheType) const; + virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; AlgorithmInfo* info() const; @@ -536,15 +536,15 @@ namespace rgbd * @param maxDepth Pixels with depth larger than maxDepth will not be used * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out * if their depth difference is larger than maxDepthDiff - * @param pointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart * @param iterCounts Count of iterations on each pyramid level. + * @param transformType Class of trasformation */ ICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), const std::vector& iterCounts = std::vector(), int transformType = RIGID_BODY_MOTION); - virtual Size - prepareFrameCache(Ptr& frame, int cacheType) const; + virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; AlgorithmInfo* info() const; @@ -586,10 +586,11 @@ namespace rgbd * @param maxDepth Pixels with depth larger than maxDepth will not be used * @param maxDepthDiff Correspondences between pixels of two given frames will be filtered out * if their depth difference is larger than maxDepthDiff - * @param pointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart + * @param maxPointsPart The method uses a random pixels subset of size frameWidth x frameHeight x pointsPart * @param iterCounts Count of iterations on each pyramid level. * @param minGradientMagnitudes For each pyramid level the pixels will be filtered out * if they have gradient magnitude less than minGradientMagnitudes[level]. + * @param transformType Class of trasformation */ RgbdICPOdometry(const Mat& cameraMatrix, float minDepth = DEFAULT_MIN_DEPTH(), float maxDepth = DEFAULT_MAX_DEPTH(), float maxDepthDiff = DEFAULT_MAX_DEPTH_DIFF(), float maxPointsPart = DEFAULT_MAX_POINTS_PART(), @@ -597,8 +598,7 @@ namespace rgbd const std::vector& minGradientMagnitudes = std::vector(), int transformType = RIGID_BODY_MOTION); - virtual Size - prepareFrameCache(Ptr& frame, int cacheType) const; + virtual Size prepareFrameCache(Ptr& frame, int cacheType) const; AlgorithmInfo* info() const; diff --git a/modules/saliency/README.md b/modules/saliency/README.md index 48ef3960344..ff8749e018b 100644 --- a/modules/saliency/README.md +++ b/modules/saliency/README.md @@ -1,4 +1,5 @@ -# Saliency Module +Saliency API, understanding where humans focus given a scene +============================================================ The purpose of this module is to create, group and make available to the users, different saliency algorithms, belonging to different categories. diff --git a/modules/surface_matching/README.md b/modules/surface_matching/README.md new file mode 100644 index 00000000000..6584d22f348 --- /dev/null +++ b/modules/surface_matching/README.md @@ -0,0 +1,2 @@ +Surface Matching Algorithm Through 3D Features +============================================== diff --git a/modules/surface_matching/include/opencv2/surface_matching/icp.hpp b/modules/surface_matching/include/opencv2/surface_matching/icp.hpp index 134de42f251..f0dc5b3032a 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/icp.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/icp.hpp @@ -101,14 +101,18 @@ class CV_EXPORTS ICP /** * \brief ICP constructor with default arguments. + * @param [in] iterations * @param [in] tolerence Controls the accuracy of registration at each iteration of ICP. - * @param [in] rejectionScale Robust outlier rejection is applied for robustness. This value actually corresponds to the standard deviation coefficient. Points with rejectionScale * \sigma are ignored during registration. - * @param [in] numLevels Number of pyramid levels to proceed. Deep pyramids increase speed but decrease accuracy. Too coarse pyramids might have computational overhead on top of the inaccurate registrtaion. This parameter should be chosen to optimize a balance. Typical values range from 4 to 10. - * @param [in] sampleType Currently this parameter is ignored and only uniform sampling is applied. Leave it as 0. + * @param [in] rejectionScale Robust outlier rejection is applied for robustness. This value + actually corresponds to the standard deviation coefficient. Points with + rejectionScale * &sigma are ignored during registration. + * @param [in] numLevels Number of pyramid levels to proceed. Deep pyramids increase speed but + decrease accuracy. Too coarse pyramids might have computational overhead on top of the + inaccurate registrtaion. This parameter should be chosen to optimize a balance. Typical + values range from 4 to 10. + * @param [in] sampleType Currently this parameter is ignored and only uniform sampling is + applied. Leave it as 0. * @param [in] numMaxCorr Currently this parameter is ignored and only PickyICP is applied. Leave it as 1. - * \return - * - * \details Constructor */ ICP(const int iterations, const float tolerence=0.05, const float rejectionScale=2.5, const int numLevels=6, const ICP_SAMPLING_TYPE sampleType = ICP_SAMPLING_TYPE_UNIFORM, const int numMaxCorr=1) { diff --git a/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp b/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp index 8c7fa76b9ed..82093701bd5 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/pose_3d.hpp @@ -96,13 +96,11 @@ class CV_EXPORTS Pose3D /** * \brief Updates the pose with the new one - * \param [in] NewPose New pose to overwrite */ void updatePose(double NewR[9], double NewT[3]); /** * \brief Updates the pose with the new one, but this time using quaternions to represent rotation - * \param [in] NewPose New pose to overwrite */ void updatePoseQuat(double Q[4], double NewT[3]); diff --git a/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp b/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp index 044fe4e299b..f05b7a7a02f 100644 --- a/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp +++ b/modules/surface_matching/include/opencv2/surface_matching/ppf_helpers.hpp @@ -49,18 +49,18 @@ namespace ppf_match_3d { /** - * \brief Load a PLY file - * - * \param [in] fileName The PLY model to read - * \param [in] withNormals Flag wheather the input PLY contains normal information, + * @brief Load a PLY file + * @param [in] fileName The PLY model to read + * @param [in] withNormals Flag wheather the input PLY contains normal information, * and whether it should be loaded or not - * \return Returns the matrix on successfull load + * @return Returns the matrix on successfull load */ CV_EXPORTS Mat loadPLYSimple(const char* fileName, int withNormals); /** - * \brief Write a point cloud to PLY file - * \param [in] fileName The PLY model file to write + * @brief Write a point cloud to PLY file + * @param [in] PC Input point cloud + * @param [in] fileName The PLY model file to write */ CV_EXPORTS void writePLY(Mat PC, const char* fileName); @@ -69,6 +69,7 @@ Mat samplePCUniformInd(Mat PC, int sampleStep, std::vector& indices); /** * Sample a point cloud using uniform steps + * @param [in] pc Input point cloud * @param [in] xrange X components (min and max) of the bounding box of the model * @param [in] yrange Y components (min and max) of the bounding box of the model * @param [in] zrange Z components (min and max) of the bounding box of the model @@ -77,7 +78,7 @@ Mat samplePCUniformInd(Mat PC, int sampleStep, std::vector& indices); * the parameter sample_step_relative. * @param [in] weightByCenter The contribution of the quantized data points can be weighted * by the distance to the origin. This parameter enables/disables the use of weighting. - * \return Sampled point cloud + * @return Sampled point cloud */ CV_EXPORTS Mat samplePCByQuantization(Mat pc, float xrange[2], float yrange[2], float zrange[2], float sample_step_relative, int weightByCenter=0); @@ -94,7 +95,7 @@ void queryPCFlann(void* flannIndex, Mat& pc, Mat& indices, Mat& distances); * @param [in] pc Input point cloud (CV_32F family). Point clouds with 3 or 6 elements per * row are expected. * @param [in] scale The scale after normalization. Default to 1. - * \return Normalized point cloud + * @return Normalized point cloud */ CV_EXPORTS Mat normalize_pc(Mat pc, float scale); @@ -107,7 +108,7 @@ Mat transPCCoeff(Mat pc, float scale, float Cx, float Cy, float Cz, float MinVal * row are expected. In the case where the normals are provided, they are also rotated to be * compatible with the entire transformation * @param [in] Pose 4x4 pose matrix, but linearized in row-major form. - * \return Transformed point cloud + * @return Transformed point cloud */ CV_EXPORTS Mat transformPCPose(Mat pc, double Pose[16]); @@ -120,25 +121,23 @@ CV_EXPORTS void getRandomPose(double Pose[16]); /** * Adds a uniform noise in the given scale to the input point cloud * @param [in] pc Input point cloud (CV_32F family). - * @param [in] scale Input scale of the noise. The larger the scale, the more - * noisy the output + * @param [in] scale Input scale of the noise. The larger the scale, the more noisy the output */ CV_EXPORTS Mat addNoisePC(Mat pc, double scale); /** - * \brief Compute the normals of an arbitrary point cloud - * - * @param [in] PC Input point cloud to compute the normals for. - * @param [in] PCNormals Output point cloud - * @param [in] NumNeighbors Number of neighbors to take into account in a local region - * @param [in] FlipViewpoint Should normals be flipped to a viewing direction? - * \return Returns 0 on success - * - * \details computeNormalsPC3d uses a plane fitting approach to smoothly compute + * @brief Compute the normals of an arbitrary point cloud + * computeNormalsPC3d uses a plane fitting approach to smoothly compute * local normals. Normals are obtained through the eigenvector of the covariance * matrix, corresponding to the smallest eigen value. * If PCNormals is provided to be an Nx6 matrix, then no new allocation * is made, instead the existing memory is overwritten. + * @param [in] PC Input point cloud to compute the normals for. + * @param [in] PCNormals Output point cloud + * @param [in] NumNeighbors Number of neighbors to take into account in a local region + * @param [in] FlipViewpoint Should normals be flipped to a viewing direction? + * @param [in] viewpoint + * @return Returns 0 on success */ CV_EXPORTS int computeNormalsPC3d(const Mat& PC, Mat& PCNormals, const int NumNeighbors, const bool FlipViewpoint, const double viewpoint[3]); } // namespace ppf_match_3d diff --git a/modules/text/README.md b/modules/text/README.md new file mode 100644 index 00000000000..91294f498c2 --- /dev/null +++ b/modules/text/README.md @@ -0,0 +1,3 @@ +Scene Text Detection and Recognition in Natural Scene Images +============================================================ + diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index d03ec836168..3064a6b88af 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -165,15 +165,14 @@ class CV_EXPORTS ERFilter : public Algorithm probability is above a global limit pmin and the difference between local maximum and local minimum is greater than minProbabilityDiff). - \param cb Callback with the classifier. - default classifier can be implicitly load with function loadClassifierNM1() - from file in samples/cpp/trained_classifierNM1.xml - \param thresholdDelta Threshold step in subsequent thresholds when extracting the component tree - \param minArea The minimum area (% of image size) allowed for retreived ER's - \param minArea The maximum area (% of image size) allowed for retreived ER's - \param minProbability The minimum probability P(er|character) allowed for retreived ER's - \param nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities - \param minProbability The minimum probability difference between local maxima and local minima ERs + @param cb – Callback with the classifier. Default classifier can be implicitly load with function + loadClassifierNM1(), e.g. from file in samples/cpp/trained_classifierNM1.xml + @param thresholdDelta – Threshold step in subsequent thresholds when extracting the component tree + @param minArea – The minimum area (% of image size) allowed for retreived ER’s + @param maxArea – The maximum area (% of image size) allowed for retreived ER’s + @param minProbability – The minimum probability P(er|character) allowed for retreived ER’s + @param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities + @param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs */ CV_EXPORTS Ptr createERFilterNM1(const Ptr& cb, int thresholdDelta = 1, float minArea = 0.00025, @@ -260,11 +259,20 @@ enum { ERGROUPING_ORIENTATION_HORIZ, combine all these hypotheses to get the final estimate. Each of the resulting groups are finally validated using a classifier in order to assest if they form a valid horizontally-aligned text block. - \param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted. - \param regions Vector of ER's retreived from the ERFilter algorithm from each channel - \param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml) - \param minProbability The minimum probability for accepting a group - \param groups The output of the algorithm are stored in this parameter as list of rectangles. + @param img – Original RGB or Grayscale image from wich the regions were extracted. + @param channels – Vector of single channel images CV_8UC1 from wich the regions were extracted. + @param regions – Vector of ER’s retreived from the ERFilter algorithm from each channel. + @param groups – The output of the algorithm is stored in this parameter as set of lists of + indexes to provided regions. + @param groups_rects – The output of the algorithm are stored in this parameter as list of rectangles. + @param method – Grouping method (see the details below). Can be one of ERGROUPING_ORIENTATION_HORIZ, + ERGROUPING_ORIENTATION_ANY. + @param filename – The XML or YAML file with the classifier model (e.g. + samples/trained_classifier_erGrouping.xml). Only to use when grouping method is + ERGROUPING_ORIENTATION_ANY. + @param minProbablity – The minimum probability for accepting a group. Only to use when grouping + method is ERGROUPING_ORIENTATION_ANY. + */ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels, std::vector > ®ions, diff --git a/modules/text/src/erfilter.cpp b/modules/text/src/erfilter.cpp index 621e6323293..bc269f8cf76 100644 --- a/modules/text/src/erfilter.cpp +++ b/modules/text/src/erfilter.cpp @@ -3751,7 +3751,7 @@ void erGroupingNM(InputArray _img, InputArrayOfArrays _src, vector< vector surf = cv::xfeatures2d::SURF::create(); std::vector cpu_keypoints; cv::Mat cpu_descriptors; - TEST_CYCLE() surf(img, cv::noArray(), cpu_keypoints, cpu_descriptors); + TEST_CYCLE() surf->detect(img, cpu_keypoints); + TEST_CYCLE() surf->compute(img, cpu_keypoints, cpu_descriptors); SANITY_CHECK_KEYPOINTS(cpu_keypoints); SANITY_CHECK(cpu_descriptors); diff --git a/modules/xfeatures2d/test/test_surf.cuda.cpp b/modules/xfeatures2d/test/test_surf.cuda.cpp index 7f17e069b2f..f87f19d799c 100644 --- a/modules/xfeatures2d/test/test_surf.cuda.cpp +++ b/modules/xfeatures2d/test/test_surf.cuda.cpp @@ -94,15 +94,10 @@ CUDA_TEST_P(SURF, Detector) std::vector keypoints; surf(loadMat(image), cv::cuda::GpuMat(), keypoints); - cv::xfeatures2d::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::Ptr surf_gold = cv::xfeatures2d::SURF::create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright); std::vector keypoints_gold; - surf_gold(image, cv::noArray(), keypoints_gold); + surf_gold->detect(image, keypoints_gold); ASSERT_EQ(keypoints_gold.size(), keypoints.size()); int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); @@ -130,15 +125,10 @@ CUDA_TEST_P(SURF, Detector_Masked) std::vector keypoints; surf(loadMat(image), loadMat(mask), keypoints); - cv::xfeatures2d::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::Ptr surf_gold = cv::xfeatures2d::SURF::create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright); std::vector keypoints_gold; - surf_gold(image, mask, keypoints_gold); + surf_gold->detect(image, keypoints_gold, mask); ASSERT_EQ(keypoints_gold.size(), keypoints.size()); int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); @@ -160,21 +150,16 @@ CUDA_TEST_P(SURF, Descriptor) surf.upright = upright; surf.keypointsRatio = 0.05f; - cv::xfeatures2d::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::Ptr surf_gold = cv::xfeatures2d::SURF::create(hessianThreshold, nOctaves, nOctaveLayers, extended, upright); std::vector keypoints; - surf_gold(image, cv::noArray(), keypoints); + surf_gold->detect(image, keypoints); cv::cuda::GpuMat descriptors; surf(loadMat(image), cv::cuda::GpuMat(), keypoints, descriptors, true); cv::Mat descriptors_gold; - surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); + surf_gold->compute(image, keypoints, descriptors_gold); cv::BFMatcher matcher(surf.defaultNorm()); std::vector matches; diff --git a/modules/ximgproc/README.md b/modules/ximgproc/README.md new file mode 100644 index 00000000000..a31a1461177 --- /dev/null +++ b/modules/ximgproc/README.md @@ -0,0 +1,9 @@ +Extended Image Processing +========================= + +1. Structured Forests +2. Domain Transform Filter +3. Guided Filter +4. Adaptive Manifold Filter +5. Joint Bilateral Filter +6. Superpixels diff --git a/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp b/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp index 0d5d18bc8a5..58fbde6e8c1 100644 --- a/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp +++ b/modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp @@ -64,8 +64,6 @@ class CV_EXPORTS_W DTFilter : public Algorithm CV_WRAP virtual void filter(InputArray src, OutputArray dst, int dDepth = -1) = 0; }; -typedef Ptr DTFilterPtr; - /*Fabric function for DT filters*/ CV_EXPORTS_W Ptr createDTFilter(InputArray guide, double sigmaSpatial, double sigmaColor, int mode = DTF_NC, int numIters = 3); @@ -101,7 +99,7 @@ class CV_EXPORTS_W AdaptiveManifoldFilter : public Algorithm * @brief Apply High-dimensional filtering using adaptive manifolds * @param src Input image to be filtered. * @param dst Adaptive-manifold filter response. - * @param src_joint Image for joint filtering (optional). + * @param joint Image for joint filtering (optional). */ CV_WRAP virtual void filter(InputArray src, OutputArray dst, InputArray joint = noArray()) = 0; diff --git a/modules/ximgproc/src/adaptive_manifold_filter_n.cpp b/modules/ximgproc/src/adaptive_manifold_filter_n.cpp index 9395632fedd..98ee98f3aff 100644 --- a/modules/ximgproc/src/adaptive_manifold_filter_n.cpp +++ b/modules/ximgproc/src/adaptive_manifold_filter_n.cpp @@ -40,6 +40,10 @@ #include #include +#ifdef _MSC_VER +# pragma warning(disable: 4512) +#endif + namespace { @@ -53,8 +57,6 @@ using namespace cv::ximgproc::intrinsics; #define SQR(x) ((x)*(x)) #endif -void computeEigenVector(const Mat1f& X, const Mat1b& mask, Mat1f& dst, int num_pca_iterations, const Mat1f& rand_vec); - inline double Log2(double n) { return log(n) / log(2.0); @@ -176,40 +178,28 @@ class AdaptiveManifoldFilterN : public AdaptiveManifoldFilter return Size( cvRound(srcSize.width * (1.0/df)), cvRound(srcSize.height*(1.0/df)) ) ; } - void downsample(InputArray src, OutputArray dst) + void downsample(const Mat& src, Mat& dst) { - if (src.isMatVector()) - { - vector& srcv = *static_cast< vector* >(src.getObj()); - vector& dstv = *static_cast< vector* >(dst.getObj()); - dstv.resize(srcv.size()); - for (int i = 0; i < (int)srcv.size(); i++) - downsample(srcv[i], dstv[i]); - } - else - { - double df = getResizeRatio(); - CV_DbgAssert(src.empty() || src.size() == srcSize); - resize(src, dst, Size(), 1.0 / df, 1.0 / df, INTER_LINEAR); - CV_DbgAssert(dst.size() == smallSize); - } + double df = getResizeRatio(); + CV_DbgAssert(src.empty() || src.size() == srcSize); + resize(src, dst, Size(), 1.0 / df, 1.0 / df, INTER_LINEAR); + CV_DbgAssert(dst.size() == smallSize); } - void upsample(InputArray src, OutputArray dst) + void upsample(const Mat& src, Mat& dst) { - if (src.isMatVector()) - { - vector& srcv = *static_cast< vector* >(src.getObj()); - vector& dstv = *static_cast< vector* >(dst.getObj()); - dstv.resize(srcv.size()); - for (int i = 0; i < (int)srcv.size(); i++) - upsample(srcv[i], dstv[i]); - } - else - { - CV_DbgAssert(src.empty() || src.size() == smallSize); - resize(src, dst, srcSize, 0, 0); - } + CV_DbgAssert(src.empty() || src.size() == smallSize); + resize(src, dst, srcSize, 0, 0); + } + + void downsample(const vector& srcv, vector& dstv) + { + mapParallel(&AdaptiveManifoldFilterN::downsample, srcv, dstv); + } + + void upsample(const vector& srcv, vector& dstv) + { + mapParallel(&AdaptiveManifoldFilterN::upsample, srcv, dstv); } private: @@ -236,6 +226,39 @@ class AdaptiveManifoldFilterN : public AdaptiveManifoldFilter static void computeDTHor(vector& srcCn, Mat& dst, float ss, float sr); static void computeDTVer(vector& srcCn, Mat& dst, float ss, float sr); + + static void computeEigenVector(const vector& X, const Mat1b& mask, Mat1f& vecDst, int num_pca_iterations, const Mat1f& vecRand); + + static void computeOrientation(const vector& X, const Mat1f& vec, Mat1f& dst); + +private: /*Parallelization routines*/ + + typedef void (AdaptiveManifoldFilterN::*MapFunc)(const Mat& src, Mat& dst); + + void mapParallel(MapFunc func, const vector& srcv, vector& dstv) + { + dstv.resize(srcv.size()); + parallel_for_(Range(0, (int)srcv.size()), MapPrallelLoopBody(this, func, srcv, dstv)); + } + + struct MapPrallelLoopBody : public cv::ParallelLoopBody + { + MapPrallelLoopBody(AdaptiveManifoldFilterN *_instancePtr, MapFunc _transform, const vector& _srcv, vector& _dstv) + : instancePtr(_instancePtr), transform(_transform), srcv(_srcv), dstv(_dstv) + {} + + AdaptiveManifoldFilterN *instancePtr; + MapFunc transform; + const vector& srcv; + vector& dstv; + + void operator () (const Range& range) const + { + for (int i = range.start; i < range.end; i++) + (instancePtr->*transform)(srcv[i], dstv[i]); + } + }; + }; CV_INIT_ALGORITHM(AdaptiveManifoldFilterN, "AdaptiveManifoldFilter", @@ -660,36 +683,36 @@ void AdaptiveManifoldFilterN::RFFilterPass(vector& joint, vector& Psi_ void AdaptiveManifoldFilterN::computeClusters(Mat1b& cluster, Mat1b& cluster_minus, Mat1b& cluster_plus) { - Mat difEtaSrc; + + Mat1f difOreientation; + if (jointCnNum > 1) { - vector eta_difCn(jointCnNum); + Mat1f initVec(1, jointCnNum); + if (useRNG) + { + rnd.fill(initVec, RNG::UNIFORM, -0.5, 0.5); + } + else + { + for (int i = 0; i < (int)initVec.total(); i++) + initVec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f; + } + + vector difEtaSrc(jointCnNum); for (int i = 0; i < jointCnNum; i++) - subtract(jointCn[i], etaFull[i], eta_difCn[i]); + subtract(jointCn[i], etaFull[i], difEtaSrc[i]); - merge(eta_difCn, difEtaSrc); - difEtaSrc = difEtaSrc.reshape(1, (int)difEtaSrc.total()); - CV_DbgAssert(difEtaSrc.cols == jointCnNum); - } + Mat1f eigenVec(1, jointCnNum); + computeEigenVector(difEtaSrc, cluster, eigenVec, num_pca_iterations_, initVec); - Mat1f initVec(1, jointCnNum); - if (useRNG) - { - rnd.fill(initVec, RNG::UNIFORM, -0.5, 0.5); + computeOrientation(difEtaSrc, eigenVec, difOreientation); + CV_DbgAssert(difOreientation.size() == srcSize); } else { - for (int i = 0; i < (int)initVec.total(); i++) - initVec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f; + subtract(jointCn[0], etaFull[0], difOreientation); } - Mat1f eigenVec(1, jointCnNum); - computeEigenVector(difEtaSrc, cluster, eigenVec, num_pca_iterations_, initVec); - - Mat1f difOreientation; - gemm(difEtaSrc, eigenVec, 1, noArray(), 0, difOreientation, GEMM_2_T); - difOreientation = difOreientation.reshape(1, srcSize.height); - CV_DbgAssert(difOreientation.size() == srcSize); - compare(difOreientation, 0, cluster_minus, CMP_LT); bitwise_and(cluster_minus, cluster, cluster_minus); @@ -721,59 +744,101 @@ void AdaptiveManifoldFilterN::computeEta(Mat& teta, Mat1b& cluster, vector& } } -void computeEigenVector(const Mat1f& X, const Mat1b& mask, Mat1f& dst, int num_pca_iterations, const Mat1f& rand_vec) +void AdaptiveManifoldFilterN::computeEigenVector(const vector& X, const Mat1b& mask, Mat1f& vecDst, int num_pca_iterations, const Mat1f& vecRand) { - CV_DbgAssert( X.cols == rand_vec.cols ); - CV_DbgAssert( X.rows == mask.size().area() ); - CV_DbgAssert( rand_vec.rows == 1 ); - - dst.create(rand_vec.size()); - rand_vec.copyTo(dst); - - Mat1f t(X.size()); + int cnNum = (int)X.size(); + int height = X[0].rows; + int width = X[0].cols; - float* dst_row = dst[0]; + vecDst.create(1, cnNum); + CV_Assert(vecRand.size() == Size(cnNum, 1) && vecDst.size() == Size(cnNum, 1)); + CV_Assert(mask.rows == height && mask.cols == width); + + const float *pVecRand = vecRand.ptr(); + Mat1d vecDstd(1, cnNum, 0.0); + double *pVecDst = vecDstd.ptr(); + Mat1f Xw(height, width); - for (int i = 0; i < num_pca_iterations; ++i) + for (int iter = 0; iter < num_pca_iterations; iter++) { - t.setTo(Scalar::all(0)); - - for (int y = 0, ind = 0; y < mask.rows; ++y) + for (int i = 0; i < height; i++) { - const uchar* mask_row = mask[y]; + const uchar *maskRow = mask.ptr(i); + float *mulRow = Xw.ptr(i); - for (int x = 0; x < mask.cols; ++x, ++ind) + //first multiplication + for (int cn = 0; cn < cnNum; cn++) { - if (mask_row[x]) + const float *srcRow = X[cn].ptr(i); + const float cnVal = pVecRand[cn]; + + if (cn == 0) + { + for (int j = 0; j < width; j++) + mulRow[j] = cnVal*srcRow[j]; + } + else { - const float* X_row = X[ind]; - float* t_row = t[ind]; + for (int j = 0; j < width; j++) + mulRow[j] += cnVal*srcRow[j]; + } + } - float dots = 0.0; - for (int c = 0; c < X.cols; ++c) - dots += dst_row[c] * X_row[c]; + for (int j = 0; j < width; j++) + if (!maskRow[j]) mulRow[j] = 0.0f; - for (int c = 0; c < X.cols; ++c) - t_row[c] = dots * X_row[c]; - } + //second multiplication + for (int cn = 0; cn < cnNum; cn++) + { + float curCnSum = 0.0f; + const float *srcRow = X[cn].ptr(i); + + for (int j = 0; j < width; j++) + curCnSum += mulRow[j]*srcRow[j]; + + //TODO: parallel reduce + pVecDst[cn] += curCnSum; } } + } + + divide(vecDstd, norm(vecDstd), vecDst); +} - dst.setTo(0.0); - for (int k = 0; k < X.rows; ++k) +void AdaptiveManifoldFilterN::computeOrientation(const vector& X, const Mat1f& vec, Mat1f& dst) +{ + int height = X[0].rows; + int width = X[0].cols; + int cnNum = (int)X.size(); + dst.create(height, width); + CV_DbgAssert(vec.rows == 1 && vec.cols == cnNum); + + const float *pVec = vec.ptr(); + + for (int i = 0; i < height; i++) + { + float *dstRow = dst.ptr(i); + + for (int cn = 0; cn < cnNum; cn++) { - const float* t_row = t[k]; + const float *srcRow = X[cn].ptr(i); + const float cnVal = pVec[cn]; - for (int c = 0; c < X.cols; ++c) + if (cn == 0) { - dst_row[c] += t_row[c]; + for (int j = 0; j < width; j++) + dstRow[j] = cnVal*srcRow[j]; + } + else + { + for (int j = 0; j < width; j++) + dstRow[j] += cnVal*srcRow[j]; } } } - - double n = norm(dst); - divide(dst, n, dst); } + + } diff --git a/modules/ximgproc/test/test_adaptive_manifold.cpp b/modules/ximgproc/test/test_adaptive_manifold.cpp index e5f32e60578..a8970041c37 100644 --- a/modules/ximgproc/test/test_adaptive_manifold.cpp +++ b/modules/ximgproc/test/test_adaptive_manifold.cpp @@ -54,19 +54,21 @@ static string getOpenCVExtraDir() return cvtest::TS::ptr()->get_data_path(); } -static void checkSimilarity(InputArray res, InputArray ref) +static void checkSimilarity(InputArray res, InputArray ref, double maxNormInf = 1, double maxNormL2 = 1.0 / 64) { double normInf = cvtest::norm(res, ref, NORM_INF); double normL2 = cvtest::norm(res, ref, NORM_L2) / res.total(); - EXPECT_LE(normInf, 1); - EXPECT_LE(normL2, 1.0 / 64); + if (maxNormInf >= 0) EXPECT_LE(normInf, maxNormInf); + if (maxNormL2 >= 0) EXPECT_LE(normL2, maxNormL2); } TEST(AdaptiveManifoldTest, SplatSurfaceAccuracy) { RNG rnd(0); + cv::setNumThreads(cv::getNumberOfCPUs()); + for (int i = 0; i < 10; i++) { Size sz(rnd.uniform(512, 1024), rnd.uniform(512, 1024)); @@ -126,6 +128,8 @@ TEST(AdaptiveManifoldTest, AuthorsReferenceAccuracy) Mat srcImg = imread(getOpenCVExtraDir() + srcImgPath); ASSERT_TRUE(!srcImg.empty()); + cv::setNumThreads(cv::getNumberOfCPUs()); + for (int i = 0; i < 3; i++) { Mat refRes = imread(getOpenCVExtraDir() + refPaths[i]); @@ -190,14 +194,19 @@ TEST_P(AdaptiveManifoldRefImplTest, RefImplAccuracy) double sigma_r = rnd.uniform(0.1, 0.9); bool adjust_outliers = (iter % 2 == 0); + cv::setNumThreads(cv::getNumberOfCPUs()); Mat res; amFilter(guide, src, res, sigma_s, sigma_r, adjust_outliers); + cv::setNumThreads(1); Mat resRef; Ptr amf = createAMFilterRefImpl(sigma_s, sigma_r, adjust_outliers); amf->filter(src, resRef, guide); - checkSimilarity(res, resRef); + //results of reference implementation may differ on small sigma_s into small isolated region + //due to low single-precision floating point numbers accuracy + //therefore the threshold of inf norm was increased + checkSimilarity(res, resRef, 25); } } diff --git a/modules/xobjdetect/README.md b/modules/xobjdetect/README.md new file mode 100644 index 00000000000..20b0fc69eee --- /dev/null +++ b/modules/xobjdetect/README.md @@ -0,0 +1,3 @@ +Integral Channel Features Detector Framework +============================================ + diff --git a/modules/xobjdetect/doc/integral_channel_features.rst b/modules/xobjdetect/doc/integral_channel_features.rst index 41865c17eda..f7db13169ea 100644 --- a/modules/xobjdetect/doc/integral_channel_features.rst +++ b/modules/xobjdetect/doc/integral_channel_features.rst @@ -165,9 +165,14 @@ Params for ICFDetector training. int model_n_rows; int model_n_cols; int bg_per_image; + std::string features_type; + float alpha; + bool is_grayscale; + bool use_fast_log; ICFDetectorParams(): feature_count(UINT_MAX), weak_count(100), - model_n_rows(56), model_n_cols(56), bg_per_image(5) + model_n_rows(56), model_n_cols(56), bg_per_image(5), + alpha(0.02), is_grayscale(false), use_fast_log(false) {} }; @@ -181,7 +186,7 @@ ICFDetector::train Train detector. -.. ocv:function:: void ICFDetector::train(const String& pos_path, const String& bg_path, ICFDetectorParams params = ICFDetectorParams()) +.. ocv:function:: void ICFDetector::train(const std::vector& pos_filenames, const std::vector& bg_filenames, ICFDetectorParams params = ICFDetectorParams()) :param pos_path: path to folder with images of objects (wildcards like ``/my/path/*.png`` are allowed) :param bg_path: path to folder with background images @@ -192,13 +197,20 @@ ICFDetector::detect Detect objects on image. -.. ocv:function:: void ICFDetector::detect(const Mat& image, vector& objects, float scaleFactor, Size minSize, Size maxSize, float threshold) +.. ocv:function:: void ICFDetector::detect(const Mat& image, vector& objects, float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector& values) + +.. ocv:function:: detect(const Mat& img, std::vector& objects, float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector& values) :param image: image for detection :param objects: output array of bounding boxes :param scaleFactor: scale between layers in detection pyramid :param minSize: min size of objects in pixels :param maxSize: max size of objects in pixels + :param minScaleFactor: min factor by which the image will be resized + :param maxScaleFactor: max factor by which the image will be resized + :param factorStep: scaling factor is incremented each pyramid layer according to this parameter + :param slidingStep: sliding window step + :param values: output vector with values of positive samples ICFDetector::write ------------------ diff --git a/modules/xobjdetect/include/opencv2/xobjdetect.hpp b/modules/xobjdetect/include/opencv2/xobjdetect.hpp index 6d86d7f7fd4..00b5135b59b 100644 --- a/modules/xobjdetect/include/opencv2/xobjdetect.hpp +++ b/modules/xobjdetect/include/opencv2/xobjdetect.hpp @@ -43,6 +43,7 @@ the use of this software, even if advised of the possibility of such damage. #define __OPENCV_XOBJDETECT_XOBJDETECT_HPP__ #include +#include #include #include @@ -102,6 +103,8 @@ std::vector > generateFeatures(Size window_size, const std::string& type, int count = INT_MAX, int channel_count = 10); +//sort in-place of columns of the input matrix +void sort_columns_without_copy(Mat& m, Mat indices = Mat()); struct CV_EXPORTS WaldBoostParams { @@ -127,8 +130,8 @@ class CV_EXPORTS WaldBoost : public Algorithm Returns feature indices chosen for cascade. Feature enumeration starts from 0 */ - virtual std::vector train(const Mat& /*data*/, - const Mat& /*labels*/) = 0; + virtual std::vector train(Mat& /*data*/, + const Mat& /*labels*/, bool use_fast_log=false) = 0; /* Predict object class given object that can compute object features @@ -157,9 +160,13 @@ struct CV_EXPORTS ICFDetectorParams int model_n_rows; int model_n_cols; int bg_per_image; + std::string features_type; + float alpha; + bool is_grayscale; + bool use_fast_log; ICFDetectorParams(): feature_count(UINT_MAX), weak_count(100), - model_n_rows(56), model_n_cols(56), bg_per_image(5) + model_n_rows(56), model_n_cols(56), bg_per_image(5), alpha(0.02f), is_grayscale(false), use_fast_log(false) {} }; @@ -167,18 +174,18 @@ class CV_EXPORTS ICFDetector { public: - ICFDetector(): waldboost_(), features_() {} + ICFDetector(): waldboost_(), features_(), ftype_() {} /* Train detector - pos_path — path to folder with images of objects + pos_filenames — paths to objects images - bg_path — path to folder with background images + bg_filenames — path backgrounds images params — parameters for detector training */ - void train(const String& pos_path, - const String& bg_path, + void train(const std::vector& pos_filenames, + const std::vector& bg_filenames, ICFDetectorParams params = ICFDetectorParams()); /* Detect object on image @@ -192,9 +199,35 @@ class CV_EXPORTS ICFDetector minSize — min size of objects in pixels maxSize — max size of objects in pixels + + slidingStep — sliding window step + + values — output vector with values of positive samples + */ + void detect(const Mat& image, std::vector& objects, - float scaleFactor, Size minSize, Size maxSize, float threshold); + float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector& values); + + /* Detect object on image + + image — image for detection + + object — output array of bounding boxes + + minScaleFactor — min factor image will be resized + + maxScaleFactor — max factor image will be resized + + factorStep — scaling factor is incremented according to factorStep + + slidingStep — sliding window step + + values — output vector with values of positive samples + + + */ + void detect(const Mat& img, std::vector& objects, float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector& values); /* Write detector to FileStorage */ void write(FileStorage &fs) const; @@ -207,6 +240,7 @@ class CV_EXPORTS ICFDetector std::vector > features_; int model_n_rows_; int model_n_cols_; + std::string ftype_; }; CV_EXPORTS void write(FileStorage& fs, String&, const ICFDetector& detector); diff --git a/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp b/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp index 3229e10f1f8..3873413db15 100644 --- a/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp +++ b/modules/xobjdetect/include/opencv2/xobjdetect/private.hpp @@ -33,10 +33,12 @@ class CV_EXPORTS Stump {-1, +1} weights — matrix of sample weights, size 1 x N + + visited_features: vector of already visited features (ignored in successive calls) Returns chosen feature index. Feature enumeration starts from 0 */ - int train(const Mat& data, const Mat& labels, const Mat& weights); + int train(const Mat& data, const Mat& labels, const Mat& weights, const std::vector& visited_features, bool use_fast_log = false); /* Predict object class given diff --git a/modules/xobjdetect/src/acffeature.cpp b/modules/xobjdetect/src/acffeature.cpp index fd3fd15da4b..39afc9bdfcb 100644 --- a/modules/xobjdetect/src/acffeature.cpp +++ b/modules/xobjdetect/src/acffeature.cpp @@ -120,18 +120,14 @@ void ICFFeatureEvaluatorImpl::setChannels(InputArrayOfArrays channels) channels_.clear(); vector ch; channels.getMatVector(ch); - CV_Assert(ch.size() == 10); for( size_t i = 0; i < ch.size(); ++i ) { const Mat &channel = ch[i]; Mat integral_channel; integral(channel, integral_channel, CV_32F); - Mat_ chan(integral_channel.rows, integral_channel.cols); - for( int row = 0; row < integral_channel.rows; ++row ) - for( int col = 0; col < integral_channel.cols; ++col ) - chan(row, col) = (int)integral_channel.at(row, col); - channels_.push_back(chan.clone()); + integral_channel.convertTo(integral_channel, CV_32S); + channels_.push_back(integral_channel.clone()); } } @@ -140,11 +136,13 @@ void ICFFeatureEvaluatorImpl::setPosition(Size position) position_ = position; } + int ICFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const { - CV_Assert(channels_.size() == 10); - CV_Assert(feature_ind < features_.size()); - + /* + + //following return is equal to this commented code, left here for readability. The new code runs much faster. + * const vector& feature = features_[feature_ind]; int x = feature[0] + position_.height; int y = feature[1] + position_.width; @@ -153,6 +151,14 @@ int ICFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const int n = feature[4]; const Mat_& ch = channels_[n]; return ch(y_to + 1, x_to + 1) - ch(y, x_to + 1) - ch(y_to + 1, x) + ch(y, x); + */ + + CV_Assert(feature_ind < features_.size()); + + return *(channels_[features_[feature_ind][4]].ptr()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][3] + position_.width+1))+ features_[feature_ind][2] + position_.height + 1)) - + *(channels_[features_[feature_ind][4]].ptr()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][1] + position_.width))+ features_[feature_ind][2] + position_.height + 1)) - + *(channels_[features_[feature_ind][4]].ptr()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][3] + position_.width+1))+ features_[feature_ind][0] + position_.height)) + + *(channels_[features_[feature_ind][4]].ptr()+((channels_[features_[feature_ind][4]].cols*(features_[feature_ind][1] + position_.width))+ features_[feature_ind][0] + position_.height)); } class ACFFeatureEvaluatorImpl : public FeatureEvaluatorImpl @@ -173,7 +179,6 @@ void ACFFeatureEvaluatorImpl::setChannels(InputArrayOfArrays channels) channels_.clear(); vector ch; channels.getMatVector(ch); - CV_Assert(ch.size() == 10); for( size_t i = 0; i < ch.size(); ++i ) { @@ -203,7 +208,6 @@ void ACFFeatureEvaluatorImpl::setPosition(Size position) int ACFFeatureEvaluatorImpl::evaluate(size_t feature_ind) const { - CV_Assert(channels_.size() == 10); CV_Assert(feature_ind < features_.size()); const vector& feature = features_[feature_ind]; @@ -271,25 +275,38 @@ vector > generateFeatures(Size window_size, const std::string& type, void computeChannels(InputArray image, vector& channels) { - Mat src(image.getMat().rows, image.getMat().cols, CV_32FC3); - image.getMat().convertTo(src, CV_32FC3, 1./255); - Mat_ grad; - Mat luv, gray; - cvtColor(src, gray, CV_RGB2GRAY); - cvtColor(src, luv, CV_RGB2Luv); + Mat_ angles; + Mat luv, gray, src; + + if(image.getMat().channels() > 1) + { + src = Mat(image.getMat().rows, image.getMat().cols, CV_32FC3); + image.getMat().convertTo(src, CV_32FC3, 1./255); + + cvtColor(src, gray, CV_RGB2GRAY); + cvtColor(src, luv, CV_RGB2Luv); + } + else + { + src = Mat(image.getMat().rows, image.getMat().cols, CV_32FC1); + image.getMat().convertTo(src, CV_32FC1, 1./255); + src.copyTo(gray); + } Mat_ row_der, col_der; Sobel(gray, row_der, CV_32F, 0, 1); Sobel(gray, col_der, CV_32F, 1, 0); - magnitude(row_der, col_der, grad); + cartToPolar(col_der, row_der, grad, angles, true); + //magnitude(row_der, col_der, grad); Mat_ hist = Mat_::zeros(grad.rows, grad.cols); - const float to_deg = 180 / 3.1415926f; + //const float to_deg = 180 / 3.1415926f; for (int row = 0; row < grad.rows; ++row) { for (int col = 0; col < grad.cols; ++col) { - float angle = atan2(row_der(row, col), col_der(row, col)) * to_deg; + //float angle = atan2(row_der(row, col), col_der(row, col)) * to_deg; + float angle = angles(row, col); if (angle < 0) angle += 180; int ind = (int)(angle / 30); @@ -304,10 +321,13 @@ void computeChannels(InputArray image, vector& channels) channels.clear(); - Mat luv_channels[3]; - split(luv, luv_channels); - for( int i = 0; i < 3; ++i ) - channels.push_back(luv_channels[i]); + if(image.getMat().channels() > 1) + { + Mat luv_channels[3]; + split(luv, luv_channels); + for( int i = 0; i < 3; ++i ) + channels.push_back(luv_channels[i]); + } channels.push_back(grad); diff --git a/modules/xobjdetect/src/icfdetector.cpp b/modules/xobjdetect/src/icfdetector.cpp index 068998d549b..aa782923364 100644 --- a/modules/xobjdetect/src/icfdetector.cpp +++ b/modules/xobjdetect/src/icfdetector.cpp @@ -58,34 +58,39 @@ using std::string; using std::min; using std::max; + namespace cv { + namespace xobjdetect { -void ICFDetector::train(const String& pos_path, - const String& bg_path, + +void ICFDetector::train(const vector& pos_filenames, + const vector& bg_filenames, ICFDetectorParams params) { - vector pos_filenames; - glob(pos_path, pos_filenames); - - vector bg_filenames; - glob(bg_path, bg_filenames); + + int color; + if(params.is_grayscale == false) + color = IMREAD_COLOR; + else + color = IMREAD_GRAYSCALE; model_n_rows_ = params.model_n_rows; model_n_cols_ = params.model_n_cols; + ftype_ = params.features_type; Size model_size(params.model_n_cols, params.model_n_rows); vector samples; /* positive samples + negative samples */ Mat sample, resized_sample; int pos_count = 0; - + for( size_t i = 0; i < pos_filenames.size(); ++i, ++pos_count ) { cout << setw(6) << (i + 1) << "/" << pos_filenames.size() << "\r"; - Mat img = imread(pos_filenames[i]); + Mat img = imread(pos_filenames[i], color); resize(img, resized_sample, model_size); samples.push_back(resized_sample.clone()); } @@ -96,18 +101,16 @@ void ICFDetector::train(const String& pos_path, for( size_t i = 0; i < bg_filenames.size(); ++i ) { cout << setw(6) << (i + 1) << "/" << bg_filenames.size() << "\r"; - Mat img = imread(bg_filenames[i]); + Mat img = imread(bg_filenames[i], color); for( int j = 0; j < params.bg_per_image; ++j, ++neg_count) { Rect r; - r.x = rng.uniform(0, img.cols); - r.width = rng.uniform(r.x + 1, img.cols); - r.y = rng.uniform(0, img.rows); - r.height = rng.uniform(r.y + 1, img.rows); - - sample = img.colRange(r.x, r.width).rowRange(r.y, r.height); - resize(sample, resized_sample, model_size); - samples.push_back(resized_sample.clone()); + r.x = rng.uniform(0, img.cols-model_size.width); + r.width = model_size.width; + r.y = rng.uniform(0, img.rows-model_size.height); + r.height = model_size.height; + sample = img.colRange(r.x, r.x + r.width).rowRange(r.y, r.y + r.height); + samples.push_back(sample.clone()); } } cout << "\n"; @@ -118,9 +121,15 @@ void ICFDetector::train(const String& pos_path, for( int i = pos_count; i < pos_count + neg_count; ++i ) labels(0, i) = -1; - vector > features = generateFeatures(model_size, "icf", - params.feature_count); - Ptr evaluator = createFeatureEvaluator(features, "icf"); + + vector > features; + if(params.is_grayscale == false) + features = generateFeatures(model_size, params.features_type, params.feature_count, 10); + else + features = generateFeatures(model_size, params.features_type, params.feature_count, 7); + + Ptr evaluator = createFeatureEvaluator(features, params.features_type); + Mat_ data = Mat_::zeros((int)features.size(), (int)samples.size()); Mat_ feature_col(1, (int)samples.size()); @@ -141,13 +150,13 @@ void ICFDetector::train(const String& pos_path, } cout << "\n"; samples.clear(); - + WaldBoostParams wparams; wparams.weak_count = params.weak_count; - wparams.alpha = 0.02f; + wparams.alpha = params.alpha; waldboost_ = createWaldBoost(wparams); - vector indices = waldboost_->train(data, labels); + vector indices = waldboost_->train(data, labels, params.use_fast_log); cout << "indices: "; for( size_t i = 0; i < indices.size(); ++i ) cout << indices[i] << " "; @@ -163,6 +172,7 @@ void ICFDetector::write(FileStorage& fs) const fs << "{"; fs << "model_n_rows" << model_n_rows_; fs << "model_n_cols" << model_n_cols_; + fs << "ftype" << String(ftype_.c_str()); fs << "waldboost"; waldboost_->write(fs); fs << "features" << "["; @@ -177,8 +187,11 @@ void ICFDetector::write(FileStorage& fs) const void ICFDetector::read(const FileNode& node) { waldboost_ = Ptr(createWaldBoost(WaldBoostParams())); + String f_temp; node["model_n_rows"] >> model_n_rows_; node["model_n_cols"] >> model_n_cols_; + f_temp = (String)node["ftype"]; + this->ftype_ = (string)f_temp.c_str(); waldboost_->read(node["waldboost"]); FileNode features = node["features"]; features_.clear(); @@ -191,49 +204,98 @@ void ICFDetector::read(const FileNode& node) } void ICFDetector::detect(const Mat& img, vector& objects, - float scaleFactor, Size minSize, Size maxSize, float threshold) + float scaleFactor, Size minSize, Size maxSize, float threshold, int slidingStep, std::vector& values) { + + float scale_from = min(model_n_cols_ / (float)maxSize.width, model_n_rows_ / (float)maxSize.height); float scale_to = max(model_n_cols_ / (float)minSize.width, model_n_rows_ / (float)minSize.height); objects.clear(); - Ptr evaluator = createFeatureEvaluator(features_, "icf"); + Ptr evaluator = createFeatureEvaluator(features_, ftype_); Mat rescaled_image; - int step = 8; vector channels; + for( float scale = scale_from; scale < scale_to + 0.001; scale *= scaleFactor ) { - cout << "scale " << scale << endl; int new_width = int(img.cols * scale); new_width -= new_width % 4; int new_height = int(img.rows * scale); new_height -= new_height % 4; - + resize(img, rescaled_image, Size(new_width, new_height)); computeChannels(rescaled_image, channels); evaluator->setChannels(channels); - for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += step) + for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += slidingStep) + { + for( int col = 0; col <= rescaled_image.cols - model_n_cols_; + col += slidingStep ) + { + evaluator->setPosition(Size(row, col)); + float value = waldboost_->predict(evaluator); + if( value > threshold ) + { + values.push_back(value); + int x = (int)(col / scale); + int y = (int)(row / scale); + int width = (int)(model_n_cols_ / scale); + int height = (int)(model_n_rows_ / scale); + objects.push_back(Rect(x, y, width, height)); + } + } + } + + } + +} + +void ICFDetector::detect(const Mat& img, vector& objects, + float minScaleFactor, float maxScaleFactor, float factorStep, float threshold, int slidingStep, std::vector& values) +{ + + if(factorStep <= 0) + { + CV_Error_(CV_StsBadArg, ("factorStep must be > 0")); + } + + objects.clear(); + Ptr evaluator = createFeatureEvaluator(features_, ftype_); + Mat rescaled_image; + vector channels; + + for( float scale = minScaleFactor; scale < maxScaleFactor + 0.001; scale += factorStep ) + { + if(scale < 1.0) + resize(img, rescaled_image, Size(),scale, scale, INTER_AREA); + else if (scale > 1.0) + resize(img, rescaled_image, Size(),scale, scale, INTER_CUBIC); + else //scale == 1.0 + img.copyTo(rescaled_image); + + computeChannels(rescaled_image, channels); + evaluator->setChannels(channels); + for( int row = 0; row <= rescaled_image.rows - model_n_rows_; row += slidingStep) { for( int col = 0; col <= rescaled_image.cols - model_n_cols_; - col += step ) + col += slidingStep ) { evaluator->setPosition(Size(row, col)); float value = waldboost_->predict(evaluator); if( value > threshold ) { + values.push_back(value); int x = (int)(col / scale); int y = (int)(row / scale); int width = (int)(model_n_cols_ / scale); int height = (int)(model_n_rows_ / scale); - cout << value << " " << x << " " << y << " " << width << " " - << height << endl; objects.push_back(Rect(x, y, width, height)); } } } } + } void write(FileStorage& fs, String&, const ICFDetector& detector) diff --git a/modules/xobjdetect/src/stump.cpp b/modules/xobjdetect/src/stump.cpp index c9c9911d815..221445a29d6 100644 --- a/modules/xobjdetect/src/stump.cpp +++ b/modules/xobjdetect/src/stump.cpp @@ -61,7 +61,27 @@ static void cumsum(const Mat_& src, Mat_ dst) } } -int Stump::train(const Mat& data, const Mat& labels, const Mat& weights) +//fast log implementation. A bit less accurate but ~5x faster +inline float fast_log2 (float val) +{ + int * const exp_ptr = reinterpret_cast (&val); + int x = *exp_ptr; + const int log_2 = ((x >> 23) & 255) - 128; + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + val = ((-1.0f/3) * val + 2) * val - 2.0f/3; // (1) + + return (val + log_2); +} + +inline float fast_log (const float &val) +{ + return (fast_log2 (val) * 0.69314718f); +} + +int Stump::train(const Mat& data, const Mat& labels, const Mat& weights, const std::vector& visited_features, bool use_fast_log) { CV_Assert(labels.rows == 1 && labels.cols == data.cols); CV_Assert(weights.rows == 1 && weights.cols == data.cols); @@ -95,8 +115,11 @@ int Stump::train(const Mat& data, const Mat& labels, const Mat& weights) /* For every feature */ for( int row = 0; row < data.rows; ++row ) { - for( int col = 0; col < data.cols; ++col ) - d(0, col) = data.at(row, col); + if(std::find(visited_features.begin(), visited_features.end(), row) != visited_features.end()) { + //feature discarded + continue; + } + data.row(row).copyTo(d.row(0)); sortIdx(d, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); @@ -141,8 +164,16 @@ int Stump::train(const Mat& data, const Mat& labels, const Mat& weights) err = sqrt(pos_right * neg_wrong) + sqrt(pos_wrong * neg_right); - h_pos = .5f * log((pos_right + eps) / (pos_wrong + eps)); - h_neg = .5f * log((neg_wrong + eps) / (neg_right + eps)); + if(use_fast_log) + { + h_pos = .5f * fast_log((pos_right + eps) / (pos_wrong + eps)); + h_neg = .5f * fast_log((neg_wrong + eps) / (neg_right + eps)); + } + else + { + h_pos = .5f * log((pos_right + eps) / (pos_wrong + eps)); + h_neg = .5f * log((neg_wrong + eps) / (neg_right + eps)); + } if( err < min_err ) { diff --git a/modules/xobjdetect/src/waldboost.cpp b/modules/xobjdetect/src/waldboost.cpp index 340aefb2cf1..8866ab2607d 100644 --- a/modules/xobjdetect/src/waldboost.cpp +++ b/modules/xobjdetect/src/waldboost.cpp @@ -50,10 +50,93 @@ using std::cout; using std::endl; + + + + namespace cv { + namespace xobjdetect { + //sort in-place of columns of the input matrix + void sort_columns_without_copy(Mat& m, Mat indices) + { + + if(indices.data == 0) + sortIdx(m, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); + + Mat indices_of_indices; + sortIdx(indices, indices_of_indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); + + std::vector visited; + for(int c = 0; c(0,ind_v))).copyTo(column); + } + else + { + temp_column.copyTo(column); + } + + + if(indices_of_indices.at(0,next) != next) //value is in the right place + { + //store the next value to change + (m.col(indices_of_indices.at(0,next))).copyTo(temp_column); + //insert the value to change at the right place + column.copyTo(m.col(indices_of_indices.at(0,next))); + + //find the index of the next value to change + next = indices_of_indices.at(0,next); + //if the idenx is not visited yet + if(visited[next] == false) + { + //then mark it as visited, it will be computed in the next round + visited[next] = true; + } + else + { + //find first non visited index + int i = 0; + while(i<(int)visited.size() && visited[i] == true) + { + i++; + } + ind_v = i; + next = i; + temp_column = Mat(); + + } + } + else // value is already at the right place + { + visited[next] = true; + int i = 0; + while(i<(int)visited.size() && visited[i] == true) + { + i++; + } + next = i; + temp_column = Mat(); + ind_v = i; + } + + + } + + + } class WaldBoostImpl : public WaldBoost { @@ -63,8 +146,8 @@ class WaldBoostImpl : public WaldBoost params_(params) {} - virtual std::vector train(const Mat& data, - const Mat& labels); + virtual std::vector train(Mat& data, + const Mat& labels, bool use_fast_log=false); virtual float predict( const Ptr& feature_evaluator) const; @@ -138,13 +221,12 @@ void WaldBoostImpl::write(FileStorage& fs) const } -vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) +vector WaldBoostImpl::train(Mat& data, const Mat& labels_, bool use_fast_log) { - CV_Assert(labels_.rows == 1 && labels_.cols == data_.cols); - CV_Assert(data_.rows >= params_.weak_count); + CV_Assert(labels_.rows == 1 && labels_.cols == data.cols); + CV_Assert(data.rows >= params_.weak_count); - Mat labels, data; - data_.copyTo(data); + Mat labels; labels_.copyTo(labels); bool null_data = true; @@ -175,18 +257,18 @@ vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) feature_indices_pool.push_back(ind); vector feature_indices; + vector visited_features; Mat_ trace = Mat_::zeros(labels.rows, labels.cols); stumps_.clear(); thresholds_.clear(); for( int i = 0; i < params_.weak_count; ++i) - { - cout << "stage " << i << endl; + { Stump s; - int feature_ind = s.train(data, labels, weights); - cout << "feature_ind " << feature_ind << endl; + int feature_ind = s.train(data, labels, weights, visited_features, use_fast_log); stumps_.push_back(s); int ind = feature_indices_pool[feature_ind]; - feature_indices_pool.erase(feature_indices_pool.begin() + feature_ind); + //we don't need to erase the feature index anymore, because we ignore them if already visited + //feature_indices_pool.erase(feature_indices_pool.begin() + feature_ind); feature_indices.push_back(ind); // Recompute weights @@ -198,12 +280,13 @@ vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) weights.at(0, col) *= exp(-label * h); } - // Erase row for feature in data - Mat fixed_data; - fixed_data.push_back(data.rowRange(0, feature_ind)); - fixed_data.push_back(data.rowRange(feature_ind + 1, data.rows)); + // set to zero row for feature in data + for(int jc = 0; jc(feature_ind, jc) = 0; + } + visited_features.push_back(feature_ind); - data = fixed_data; // Normalize weights @@ -218,7 +301,6 @@ vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) sortIdx(trace, indices, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); Mat new_weights = Mat_::zeros(weights.rows, weights.cols); Mat new_labels = Mat_::zeros(labels.rows, labels.cols); - Mat new_data = Mat_::zeros(data.rows, data.cols); Mat new_trace; for( int col = 0; col < new_weights.cols; ++col ) { @@ -226,15 +308,12 @@ vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) weights.at(0, indices.at(0, col)); new_labels.at(0, col) = labels.at(0, indices.at(0, col)); - for( int row = 0; row < new_data.rows; ++row ) - { - new_data.at(row, col) = - data.at(row, indices.at(0, col)); - } } + + //sort in-place to save memory + sort_columns_without_copy(data, indices); sort(trace, new_trace, cv::SORT_EVERY_ROW | cv::SORT_ASCENDING); - // Compute threshold for trace /* int col = 0; @@ -262,19 +341,16 @@ vector WaldBoostImpl::train(const Mat& data_, const Mat& labels_) } thresholds_.push_back(new_trace.at(0, max_col)); - cout << "threshold " << *(thresholds_.end() - 1) << endl; - - cout << "col " << max_col << " size " << data.cols << endl; // Drop samples below threshold - new_data.colRange(max_col, new_data.cols).copyTo(data); + //uses Rois instead of copyTo to save memory + data = data(Rect(max_col, 0, data.cols - max_col, data.rows)); new_trace.colRange(max_col, new_trace.cols).copyTo(trace); new_weights.colRange(max_col, new_weights.cols).copyTo(weights); new_labels.colRange(max_col, new_labels.cols).copyTo(labels); pos_count = count(labels, +1); neg_count = count(labels, -1); - cout << "pos_count " << pos_count << "; neg_count " << neg_count << endl; if( data.cols < 2 || neg_count == 0) { @@ -293,6 +369,7 @@ float WaldBoostImpl::predict( { int value = feature_evaluator->evaluate(i); trace += stumps_[i].predict(value); + if( trace < thresholds_[i] ) return -1; } diff --git a/modules/xphoto/README.md b/modules/xphoto/README.md new file mode 100644 index 00000000000..fc4aa8857b9 --- /dev/null +++ b/modules/xphoto/README.md @@ -0,0 +1,7 @@ +Additional photo processing algorithms +====================================== + +1. Color balance +2. Denoising +3. Inpainting +