From bc303bcb9b9d20fb94bff2113eaf753ff8e3ed70 Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Thu, 9 Apr 2020 20:30:03 -0400 Subject: [PATCH 01/11] Adds model and inference for pose estimation. --- skel-track-notes.md | 10 ++ vino_core_lib/CMakeLists.txt | 2 + .../inferences/human_pose_estimation.h | 124 ++++++++++++++++++ .../models/human_pose_estimation_model.h | 67 ++++++++++ .../vino_core_lib/outputs/base_output.h | 7 + .../src/inferences/human_pose_estimation.cpp | 81 ++++++++++++ .../models/human_pose_estimation_model.cpp | 100 ++++++++++++++ 7 files changed, 391 insertions(+) create mode 100644 skel-track-notes.md create mode 100644 vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h create mode 100644 vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h create mode 100644 vino_core_lib/src/inferences/human_pose_estimation.cpp create mode 100644 vino_core_lib/src/models/human_pose_estimation_model.cpp diff --git a/skel-track-notes.md b/skel-track-notes.md new file mode 100644 index 00000000..f68e174d --- /dev/null +++ b/skel-track-notes.md @@ -0,0 +1,10 @@ +# Notes for Skeleton Tracking Package + +* Model URL: https://docs.openvinotoolkit.org/latest/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html + +## Installation + +```bash +$ cd /opt/openvino_toolkit/open_model_zoo/tools/downloader +$ python3 downloader.py --name human-pose-estimation-0001 +``` \ No newline at end of file diff --git a/vino_core_lib/CMakeLists.txt b/vino_core_lib/CMakeLists.txt index ebbdc780..ca56656a 100644 --- a/vino_core_lib/CMakeLists.txt +++ b/vino_core_lib/CMakeLists.txt @@ -147,6 +147,7 @@ add_library(${PROJECT_NAME} SHARED src/inferences/object_detection.cpp src/inferences/object_segmentation.cpp src/inferences/person_reidentification.cpp + src/inferences/human_pose_estimation.cpp src/inputs/realsense_camera.cpp src/inputs/realsense_camera_topic.cpp src/inputs/standard_camera.cpp @@ -160,6 +161,7 @@ add_library(${PROJECT_NAME} SHARED src/models/object_detection_model.cpp src/models/object_segmentation_model.cpp src/models/person_reidentification_model.cpp + src/models/human_pose_estimation_model.cpp src/outputs/image_window_output.cpp src/outputs/ros_topic_output.cpp src/outputs/rviz_output.cpp diff --git a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h new file mode 100644 index 00000000..0052c382 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h @@ -0,0 +1,124 @@ +// TODO add license + +/** + * @brief A header file with declaration for AgeGenderDetection Class + * @file human_pose_estimation.h + */ +#ifndef VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H +#define VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H + +#include +#include +#include + +#include "vino_core_lib/engines/engine.h" +#include "vino_core_lib/inferences/base_inference.h" +#include "vino_core_lib/models/human_pose_estimation_model.h" +#include "inference_engine.hpp" +#include "opencv2/opencv.hpp" + +namespace vino_core_lib +{ + +/** + * @class HumanPoseResult + * @brief Class for storing and processing age and gender detection result. + */ +class HumanPoseResult : public Result +{ + public: + explicit HumanPoseResult(const cv::Rect& location); + + // Following similar structure of vino_core_lib/inferences/object_detection.h + // and human_pose_estimation_demo/src/human_pose_estimator.h + + /** + * @brief Get the age keypoints of the estimated pose from the result. + * @return The estimated keypoints. + */ + std::vector getKeypoints() const + { + return keypoints_; + } + + /** + * @brief Get the score of the estimated pose from the result. + * @return The score of the estimation. + */ + float getScore() const + { + return score_; + } + + std::vector keypoints_; + float score_ = -1; +}; + + +/** + * @class HumanPoseEstimation + * @brief Class to load the human pose estimation model and perform + human pose estimation. + */ +class HumanPoseEstimation : public BaseInference +{ + public: + using Result = vino_core_lib::HumanPoseResult; + HumanPoseEstimation(); + ~HumanPoseEstimation() override; + /** + * @brief Load the age gender detection model. + */ + void loadNetwork(std::shared_ptr); + /** + * @brief Enqueue a frame to this class. + * The frame will be buffered but not inferred yet. + * @param[in] frame The frame to be enqueued. + * @param[in] input_frame_loc The location of the enqueued frame with respect + * to the frame generated by the input device. + * @return Whether this operation is successful. + */ + bool enqueue(const cv::Mat& frame, const cv::Rect&) override; + /** + * @brief Start inference for all buffered frames. + * @return Whether this operation is successful. + */ + bool submitRequest() override; + /** + * @brief This function will fetch the results of the previous inference and + * stores the results in a result buffer array. All buffered frames will be + * cleared. + * @return Whether the Inference object fetches a result this time + */ + bool fetchResults() override; + /** + * @brief Get the length of the buffer result array. + * @return The length of the buffer result array. + */ + const int getResultsLength() const override; + /** + * @brief Get the location of result with respect + * to the frame generated by the input device. + * @param[in] idx The index of the result. + */ + const vino_core_lib::Result* getLocationResult(int idx) const override; + /** + * @brief Get the name of the Inference instance. + * @return The name of the Inference instance. + */ + const std::string getName() const override; + /** + * @brief Show the observed detection result either through image window + * or ROS topic. + */ + const void observeOutput( + const std::shared_ptr& output) override; + + private: + std::shared_ptr valid_model_; + std::vector results_; +}; + +} // namespace vino_core_lib + +#endif // VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H \ No newline at end of file diff --git a/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h b/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h new file mode 100644 index 00000000..f96ddeb3 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h @@ -0,0 +1,67 @@ +// TODO: Add license + +/** + * @brief A header file with declaration for FaceDetectionModel Class + * @file human_pose_estimation_model.h + */ + +#ifndef VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H +#define VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H + +#include +#include "vino_core_lib/models/base_model.h" + +namespace Models +{ +/** + * @class HumanPoseEstimationModel + * @brief This class generates the human pose estimation model. + */ +class HumanPoseEstimationModel : public BaseModel +{ +public: + HumanPoseEstimationModel(const std::string&, int, int, int); + + /** + * @brief Get the input name. + * @return Input name. + */ + inline const std::string getInputName() const + { + return input_; + } + /** + * @brief Get the age from the detection reuslt. + * @return Detected age. + */ + inline const std::string getOutputKeypointsName() const + { + return output_keypoints_; + } + /** + * @brief Get the gender from the detection reuslt. + * @return Detected gender. + */ + inline const std::string getOutputHeatmapName() const + { + return output_heatmap_; + } + /** + * @brief Get the name of this detection model. + * @return Name of the model. + */ + const std::string getModelName() const override; + +protected: + void checkLayerProperty(const InferenceEngine::CNNNetReader::Ptr&) override; + void setLayerProperty(InferenceEngine::CNNNetReader::Ptr) override; + + private: + std::string input_; + std::string output_keypoints_; + std::string output_heatmap_; +}; + +} // namespace Models + +#endif // VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H \ No newline at end of file diff --git a/vino_core_lib/include/vino_core_lib/outputs/base_output.h b/vino_core_lib/include/vino_core_lib/outputs/base_output.h index 5fa5bfce..4e85b678 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/base_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/base_output.h @@ -33,6 +33,7 @@ #include "vino_core_lib/inferences/object_detection.h" #include "vino_core_lib/inferences/object_segmentation.h" #include "vino_core_lib/inferences/person_reidentification.h" +#include "vino_core_lib/inferences/human_pose_estimation.h" #include "vino_core_lib/services/frame_processing_server.h" #include "opencv2/opencv.hpp" @@ -97,6 +98,12 @@ class BaseOutput virtual void accept(const std::vector &) { } + /** + * @brief Generate output content according to the human pose estimation result. + */ + virtual void accept(const std::vector &) + { + } /** * @brief Calculate the camera matrix of a frame for image window output, no implementation for ros topic output. diff --git a/vino_core_lib/src/inferences/human_pose_estimation.cpp b/vino_core_lib/src/inferences/human_pose_estimation.cpp new file mode 100644 index 00000000..b348d683 --- /dev/null +++ b/vino_core_lib/src/inferences/human_pose_estimation.cpp @@ -0,0 +1,81 @@ +// TODO add license + +/** + * @brief a header file with declaration of HumanPoseResult class //TODO update + * @file human_pose_estimation.cpp + */ + +#include +#include + +#include "vino_core_lib/inferences/human_pose_estimation.h" +#include "vino_core_lib/outputs/base_output.h" + +// HumanPoseResult +vino_core_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) + : Result(location) +{ +} + +// HumanPoseEstimation +vino_core_lib::HumanPoseEstimation::HumanPoseEstimation() + : vino_core_lib::BaseInference() +{ +} + +vino_core_lib::HumanPoseEstimation::~HumanPoseEstimation() = default; + +void vino_core_lib::HumanPoseEstimation::loadNetwork( + std::shared_ptr network) +{ + valid_model_ = network; + setMaxBatchSize(network->getMaxBatchSize()); +} + +bool vino_core_lib::HumanPoseEstimation::enqueue( + const cv::Mat& frame, const cv::Rect& input_frame_loc) +{ + if (getEnqueuedNum() == 0) + { + results_.clear(); + } + bool succeed = vino_core_lib::BaseInference::enqueue( + frame, input_frame_loc, 1, getResultsLength(), + valid_model_->getInputName()); + if (!succeed) return false; + Result r(input_frame_loc); + results_.emplace_back(r); + return true; +} + +bool vino_core_lib::HumanPoseEstimation::submitRequest() +{ + return vino_core_lib::BaseInference::submitRequest(); +} + +// TODO fetchResults() + +const int vino_core_lib::HumanPoseEstimation::getResultsLength() const +{ + return static_cast(results_.size()); +} + +const vino_core_lib::Result* +vino_core_lib::HumanPoseEstimation::getLocationResult(int idx) const +{ + return &(results_[idx]); +} + +const std::string vino_core_lib::HumanPoseEstimation::getName() const +{ + return valid_model_->getModelName(); +} + +const void vino_core_lib::HumanPoseEstimation::observeOutput( + const std::shared_ptr& output) +{ + if (output != nullptr) + { + output->accept(results_); + } +} diff --git a/vino_core_lib/src/models/human_pose_estimation_model.cpp b/vino_core_lib/src/models/human_pose_estimation_model.cpp new file mode 100644 index 00000000..bf1f8648 --- /dev/null +++ b/vino_core_lib/src/models/human_pose_estimation_model.cpp @@ -0,0 +1,100 @@ +// TODO add license + +// TODO Remove these comments +/* +Model URL: https://docs.openvinotoolkit.org/latest/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html + +# Inputs + +Shape: [1x3x256x456]. An input image in the [BxCxHxW] format , where: + B - batch size + C - number of channels + H - image height + W - image width. Expected color order is BGR. + +# Outputs + +The net outputs two blobs with the [1, 38, 32, 57] and [1, 19, 32, 57] shapes. +The first blob contains keypoint pairwise relations (part affinity fields), +while the second blob contains keypoint heatmaps. +*/ + +#include + +#include "vino_core_lib/models/human_pose_estimation_model.h" +#include "vino_core_lib/slog.h" + +Models::HumanPoseEstimationModel::HumanPoseEstimationModel( + const std::string& model_loc, + int input_num, int output_num, + int max_batch_size) + : BaseModel(model_loc, input_num, output_num, max_batch_size) +{ +} + +void Models::HumanPoseEstimationModel::setLayerProperty( + InferenceEngine::CNNNetReader::Ptr net_reader) +{ + // set input property + InferenceEngine::InputsDataMap input_info_map( + net_reader->getNetwork().getInputsInfo()); + InferenceEngine::InputInfo::Ptr input_info = input_info_map.begin()->second; + input_info->setPrecision(InferenceEngine::Precision::U8); // It's an image + input_info->setLayout(InferenceEngine::Layout::NCHW); // No idea. + // set output property + InferenceEngine::OutputsDataMap output_info_map( + net_reader->getNetwork().getOutputsInfo()); + auto it = output_info_map.begin(); + InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; + InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + keypoints_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + keypoints_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + heatmap_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + heatmap_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + // set input and output layer name + input_ = input_info_map.begin()->first; + output_keypoints_ = keypoints_output_ptr->name; + output_heatmap_ = heatmap_output_ptr->name; +} + +void Models::HumanPoseEstimationModel::checkLayerProperty( + const InferenceEngine::CNNNetReader::Ptr& net_reader) +{ + slog::info << "Checking Age Gender Detection outputs" << slog::endl; + InferenceEngine::OutputsDataMap output_info( + net_reader->getNetwork().getOutputsInfo()); + auto it = output_info.begin(); + InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; + InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + // output layer of age should be Convolution type + // TODO check if the output types are correct. +// if (heatmap_output_ptr->getCreatorLayer().lock()->type == "Convolution") +// { +// std::swap(keypoints_output_ptr, heatmap_output_ptr); +// } +// if (keypoints_output_ptr->getCreatorLayer().lock()->type != "Convolution") +// { +// throw std::logic_error("In Age Gender network, age layer (" + +// keypoints_output_ptr->getCreatorLayer().lock()->name + +// ") should be a Convolution, but was: " + +// keypoints_output_ptr->getCreatorLayer().lock()->type); +// } +// if (heatmap_output_ptr->getCreatorLayer().lock()->type != "SoftMax") +// { +// throw std::logic_error("In Age Gender network, gender layer (" + +// heatmap_output_ptr->getCreatorLayer().lock()->name + +// ") should be a SoftMax, but was: " + +// heatmap_output_ptr->getCreatorLayer().lock()->type); +// } + slog::info << "Keypoints layer: " + << keypoints_output_ptr->getCreatorLayer().lock()->name + << slog::endl; + slog::info << "Heatmap layer: " + << heatmap_output_ptr->getCreatorLayer().lock()->name + << slog::endl; +} + +const std::string Models::HumanPoseEstimationModel::getModelName() const +{ + return "Human Pose Estimation"; +} \ No newline at end of file From b8bf41d4d0369533fb718dcf5d98f7e59597ba7d Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Mon, 13 Apr 2020 20:17:57 -0400 Subject: [PATCH 02/11] Implements the human pose detection. --- vino_core_lib/CMakeLists.txt | 1 + .../inferences/human_pose_estimation.h | 58 +++- .../include/vino_core_lib/inferences/peak.h | 81 +++++ .../include/vino_core_lib/pipeline_manager.h | 2 + .../include/vino_core_lib/pipeline_params.h | 1 + .../src/inferences/human_pose_estimation.cpp | 169 ++++++++- vino_core_lib/src/inferences/peak.cpp | 327 ++++++++++++++++++ vino_core_lib/src/pipeline_manager.cpp | 25 +- vino_launch/launch/pipeline_pose.launch | 19 + vino_launch/param/pipeline_pose.yaml | 44 +++ 10 files changed, 713 insertions(+), 14 deletions(-) create mode 100644 vino_core_lib/include/vino_core_lib/inferences/peak.h create mode 100644 vino_core_lib/src/inferences/peak.cpp create mode 100644 vino_launch/launch/pipeline_pose.launch create mode 100644 vino_launch/param/pipeline_pose.yaml diff --git a/vino_core_lib/CMakeLists.txt b/vino_core_lib/CMakeLists.txt index ca56656a..eac9c900 100644 --- a/vino_core_lib/CMakeLists.txt +++ b/vino_core_lib/CMakeLists.txt @@ -148,6 +148,7 @@ add_library(${PROJECT_NAME} SHARED src/inferences/object_segmentation.cpp src/inferences/person_reidentification.cpp src/inferences/human_pose_estimation.cpp + src/inferences/peak.cpp src/inputs/realsense_camera.cpp src/inputs/realsense_camera_topic.cpp src/inputs/standard_camera.cpp diff --git a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h index 0052c382..aedfe04e 100644 --- a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h +++ b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h @@ -28,6 +28,10 @@ class HumanPoseResult : public Result { public: explicit HumanPoseResult(const cv::Rect& location); + HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, // = std::vector(), + const float& score); // = 0); // Following similar structure of vino_core_lib/inferences/object_detection.h // and human_pose_estimation_demo/src/human_pose_estimator.h @@ -38,7 +42,7 @@ class HumanPoseResult : public Result */ std::vector getKeypoints() const { - return keypoints_; + return keypoints; } /** @@ -47,11 +51,11 @@ class HumanPoseResult : public Result */ float getScore() const { - return score_; + return score; } - std::vector keypoints_; - float score_ = -1; + std::vector keypoints; + float score = -1; }; @@ -115,8 +119,54 @@ class HumanPoseEstimation : public BaseInference const std::shared_ptr& output) override; private: + /** + * @brief + * + * Copied from: https://github.com/opencv/open_model_zoo/blob/master/demos/human_pose_estimation_demo/src/human_pose_estimator.cpp + * + * @param heatMapsData + * @param heatMapOffset + * @param nHeatMaps + * @param pafsData + * @param pafOffset + * @param nPafs + * @param featureMapWidth + * @param featureMapHeight + * @param imageSize + * @return std::vector + */ + std::vector postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const; + + void resizeFeatureMaps(std::vector& featureMaps) const; + + std::vector extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const; + + void correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const; + std::shared_ptr valid_model_; std::vector results_; + int width_ = 0; + int height_ = 0; + + // TODO add doc + int upsampleRatio_; // = 4; + float minPeaksDistance_; // = 3.0f; + const size_t keypointsNumber_ = 18; + float midPointsScoreThreshold_; // = 0.05f; + float foundMidPointsRatioThreshold_; // = 0.8f; + int minJointsNumber_; // = 3; + float minSubsetScore_; // = 0.2f; + int stride_; // = 8; + cv::Vec4i pad_; + cv::Size imageSize_; // = image.size(); }; } // namespace vino_core_lib diff --git a/vino_core_lib/include/vino_core_lib/inferences/peak.h b/vino_core_lib/include/vino_core_lib/inferences/peak.h new file mode 100644 index 00000000..20b1ac49 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/inferences/peak.h @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +// #include "human_pose.hpp" +#include "vino_core_lib/inferences/human_pose_estimation.h" + +namespace human_pose_estimation +{ + +using Result = vino_core_lib::HumanPoseResult; + +struct Peak { + Peak(const int id = -1, + const cv::Point2f& pos = cv::Point2f(), + const float score = 0.0f); + + int id; + cv::Point2f pos; + float score; +}; + +struct HumanPoseByPeaksIndices { + explicit HumanPoseByPeaksIndices(const int keypointsNumber); + + std::vector peaksIndices; + int nJoints; + float score; +}; + +struct TwoJointsConnection { + TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score); + + int firstJointIdx; + int secondJointIdx; + float score; +}; + +void findPeaks(const std::vector& heatMaps, + const float minPeaksDistance, + std::vector >& allPeaks, + int heatMapId); + +std::vector groupPeaksToPoses( + const std::vector >& allPeaks, + const std::vector& pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); + +class FindPeaksBody: public cv::ParallelLoopBody { +public: + FindPeaksBody(const std::vector& heatMaps, float minPeaksDistance, + std::vector >& peaksFromHeatMap) + : heatMaps(heatMaps), + minPeaksDistance(minPeaksDistance), + peaksFromHeatMap(peaksFromHeatMap) {} + + virtual void operator()(const cv::Range& range) const { + for (int i = range.start; i < range.end; i++) { + findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i); + } + } + +private: + const std::vector& heatMaps; + float minPeaksDistance; + std::vector >& peaksFromHeatMap; +}; + +} // namespace human_pose_estimation diff --git a/vino_core_lib/include/vino_core_lib/pipeline_manager.h b/vino_core_lib/include/vino_core_lib/pipeline_manager.h index 2ce98467..3a6d81db 100644 --- a/vino_core_lib/include/vino_core_lib/pipeline_manager.h +++ b/vino_core_lib/include/vino_core_lib/pipeline_manager.h @@ -104,6 +104,8 @@ class PipelineManager { const Params::ParamManager::InferenceParams& infer); std::shared_ptr createPersonReidentification( const Params::ParamManager::InferenceParams& infer); + std::shared_ptr createHumanPoseEstimation( + const Params::ParamManager::InferenceParams& infer); std::map pipelines_; std::map plugins_for_devices_; }; diff --git a/vino_core_lib/include/vino_core_lib/pipeline_params.h b/vino_core_lib/include/vino_core_lib/pipeline_params.h index 676e2e5e..2a934d42 100644 --- a/vino_core_lib/include/vino_core_lib/pipeline_params.h +++ b/vino_core_lib/include/vino_core_lib/pipeline_params.h @@ -55,6 +55,7 @@ const char kInferTpye_HeadPoseEstimation[] = "HeadPoseEstimation"; const char kInferTpye_ObjectDetection[] = "ObjectDetection"; const char kInferTpye_ObjectSegmentation[] = "ObjectSegmentation"; const char kInferTpye_PersonReidentification[] = "PersonReidentification"; +const char kInferTpye_HumanPoseEstimation[] = "HumanPoseEstimation"; /** * @class PipelineParams diff --git a/vino_core_lib/src/inferences/human_pose_estimation.cpp b/vino_core_lib/src/inferences/human_pose_estimation.cpp index b348d683..93cf967c 100644 --- a/vino_core_lib/src/inferences/human_pose_estimation.cpp +++ b/vino_core_lib/src/inferences/human_pose_estimation.cpp @@ -3,6 +3,9 @@ /** * @brief a header file with declaration of HumanPoseResult class //TODO update * @file human_pose_estimation.cpp + * + * This file was mostly based on age_gender_detection.cpp. It also uses the code + * from object_detection.cpp. */ #include @@ -10,6 +13,7 @@ #include "vino_core_lib/inferences/human_pose_estimation.h" #include "vino_core_lib/outputs/base_output.h" +#include "vino_core_lib/inferences/peak.h" // HumanPoseResult vino_core_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) @@ -17,9 +21,27 @@ vino_core_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) { } +vino_core_lib::HumanPoseResult::HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, + const float& score) + : keypoints(keypoints), + score(score), + Result(location) +{ +} + // HumanPoseEstimation vino_core_lib::HumanPoseEstimation::HumanPoseEstimation() - : vino_core_lib::BaseInference() + : vino_core_lib::BaseInference(), + upsampleRatio_(4), + minPeaksDistance_(3.0f), + midPointsScoreThreshold_(0.05f), + foundMidPointsRatioThreshold_(0.8f), + minJointsNumber_(3), + minSubsetScore_(0.2f), + stride_(8), + pad_(cv::Vec4i::all(0)) { } @@ -35,15 +57,25 @@ void vino_core_lib::HumanPoseEstimation::loadNetwork( bool vino_core_lib::HumanPoseEstimation::enqueue( const cv::Mat& frame, const cv::Rect& input_frame_loc) { - if (getEnqueuedNum() == 0) + // object_detection.cpp + if (width_ == 0 && height_ == 0) // TODO do we need this? + { + width_ = frame.cols; + height_ = frame.rows; + } + + // TODO verify this. + imageSize_ = frame.size(); + + //TODO implement preprocessing? + + if (!vino_core_lib::BaseInference::enqueue( + frame, input_frame_loc, 1, 0, valid_model_->getInputName())) { - results_.clear(); + return false; } - bool succeed = vino_core_lib::BaseInference::enqueue( - frame, input_frame_loc, 1, getResultsLength(), - valid_model_->getInputName()); - if (!succeed) return false; Result r(input_frame_loc); + results_.clear(); results_.emplace_back(r); return true; } @@ -53,7 +85,43 @@ bool vino_core_lib::HumanPoseEstimation::submitRequest() return vino_core_lib::BaseInference::submitRequest(); } -// TODO fetchResults() +bool vino_core_lib::HumanPoseEstimation::fetchResults() +{ + bool can_fetch = vino_core_lib::BaseInference::fetchResults(); + if (!can_fetch) return false; + auto request = getEngine()->getRequest(); + InferenceEngine::Blob::Ptr keypointsBlob = + request->GetBlob(valid_model_->getOutputKeypointsName()); + InferenceEngine::Blob::Ptr heatmapBlob = + request->GetBlob(valid_model_->getOutputHeatmapName()); + + //TODO implement postprocessing function. + + // At this point, keypointsBlob has the [1,38,32,57] output + + results_.clear(); + CV_Assert(heatmapBlob->getTensorDesc().getDims()[1] == keypointsNumber_ + 1); + InferenceEngine::SizeVector heatMapDims = + heatmapBlob->getTensorDesc().getDims(); + //std::vector poses = postprocess( + results_ = postprocess( + heatmapBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsNumber_, + keypointsBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsBlob->getTensorDesc().getDims()[1], + heatMapDims[3], heatMapDims[2], imageSize_); + + slog::info << results_.size() << " people detected" << slog::endl; + + // for (size_t i = 0; i < results_.size(); ++i) + // { + // results_[i].age_ = ageBlob->buffer().as()[i] * 100; + // results_[i].male_prob_ = genderBlob->buffer().as()[i * 2 + 1]; + // } + return true; +} const int vino_core_lib::HumanPoseEstimation::getResultsLength() const { @@ -79,3 +147,88 @@ const void vino_core_lib::HumanPoseEstimation::observeOutput( output->accept(results_); } } + +using Result = vino_core_lib::HumanPoseResult; + +std::vector vino_core_lib::HumanPoseEstimation::postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const +{ + std::vector heatMaps(nHeatMaps); + for (size_t i = 0; i < heatMaps.size(); i++) { + heatMaps[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + heatMapsData + i * heatMapOffset))); + } + resizeFeatureMaps(heatMaps); + + std::vector pafs(nPafs); + for (size_t i = 0; i < pafs.size(); i++) { + pafs[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + pafsData + i * pafOffset))); + } + resizeFeatureMaps(pafs); + + std::vector poses = extractPoses(heatMaps, pafs); + correctCoordinates(poses, heatMaps[0].size(), imageSize); + return poses; +} + +std::vector vino_core_lib::HumanPoseEstimation::extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const { + std::vector > peaksFromHeatMap(heatMaps.size()); + human_pose_estimation::FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance_, peaksFromHeatMap); + cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), + findPeaksBody); + int peaksBefore = 0; + for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) { + peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); + for (auto& peak : peaksFromHeatMap[heatmapId]) { + peak.id += peaksBefore; + } + } + std::vector poses = groupPeaksToPoses( + peaksFromHeatMap, pafs, keypointsNumber_, midPointsScoreThreshold_, + foundMidPointsRatioThreshold_, minJointsNumber_, minSubsetScore_); + return poses; +} + +void vino_core_lib::HumanPoseEstimation::resizeFeatureMaps(std::vector& featureMaps) const { + for (auto& featureMap : featureMaps) { + cv::resize(featureMap, featureMap, cv::Size(), + upsampleRatio_, upsampleRatio_, cv::INTER_CUBIC); + } +} + +void vino_core_lib::HumanPoseEstimation::correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const { + CV_Assert(stride_ % upsampleRatio_ == 0); + + cv::Size fullFeatureMapSize = featureMapsSize * stride_ / upsampleRatio_; + + float scaleX = imageSize.width / + static_cast(fullFeatureMapSize.width - pad_(1) - pad_(3)); + float scaleY = imageSize.height / + static_cast(fullFeatureMapSize.height - pad_(0) - pad_(2)); + for (auto& pose : poses) { + for (auto& keypoint : pose.keypoints) { + if (keypoint != cv::Point2f(-1, -1)) { + keypoint.x *= stride_ / upsampleRatio_; + keypoint.x -= pad_(1); + keypoint.x *= scaleX; + + keypoint.y *= stride_ / upsampleRatio_; + keypoint.y -= pad_(0); + keypoint.y *= scaleY; + } + } + } +} + diff --git a/vino_core_lib/src/inferences/peak.cpp b/vino_core_lib/src/inferences/peak.cpp new file mode 100644 index 00000000..d1934be6 --- /dev/null +++ b/vino_core_lib/src/inferences/peak.cpp @@ -0,0 +1,327 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "vino_core_lib/inferences/peak.h" + +namespace human_pose_estimation { +Peak::Peak(const int id, const cv::Point2f& pos, const float score) + : id(id), + pos(pos), + score(score) {} + +HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber) + : peaksIndices(std::vector(keypointsNumber, -1)), + nJoints(0), + score(0.0f) {} + +TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score) + : firstJointIdx(firstJointIdx), + secondJointIdx(secondJointIdx), + score(score) {} + +void findPeaks(const std::vector& heatMaps, + const float minPeaksDistance, + std::vector >& allPeaks, + int heatMapId) { + const float threshold = 0.1f; + std::vector peaks; + const cv::Mat& heatMap = heatMaps[heatMapId]; + const float* heatMapData = heatMap.ptr(); + size_t heatMapStep = heatMap.step1(); + for (int y = -1; y < heatMap.rows + 1; y++) { + for (int x = -1; x < heatMap.cols + 1; x++) { + float val = 0; + if (x >= 0 + && y >= 0 + && x < heatMap.cols + && y < heatMap.rows) { + val = heatMapData[y * heatMapStep + x]; + val = val >= threshold ? val : 0; + } + + float left_val = 0; + if (y >= 0 + && x < (heatMap.cols - 1) + && y < heatMap.rows) { + left_val = heatMapData[y * heatMapStep + x + 1]; + left_val = left_val >= threshold ? left_val : 0; + } + + float right_val = 0; + if (x > 0 + && y >= 0 + && y < heatMap.rows) { + right_val = heatMapData[y * heatMapStep + x - 1]; + right_val = right_val >= threshold ? right_val : 0; + } + + float top_val = 0; + if (x >= 0 + && x < heatMap.cols + && y < (heatMap.rows - 1)) { + top_val = heatMapData[(y + 1) * heatMapStep + x]; + top_val = top_val >= threshold ? top_val : 0; + } + + float bottom_val = 0; + if (x >= 0 + && y > 0 + && x < heatMap.cols) { + bottom_val = heatMapData[(y - 1) * heatMapStep + x]; + bottom_val = bottom_val >= threshold ? bottom_val : 0; + } + + if ((val > left_val) + && (val > right_val) + && (val > top_val) + && (val > bottom_val)) { + peaks.push_back(cv::Point(x, y)); + } + } + } + std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) { + return a.x < b.x; + }); + std::vector isActualPeak(peaks.size(), true); + int peakCounter = 0; + std::vector& peaksWithScoreAndID = allPeaks[heatMapId]; + for (size_t i = 0; i < peaks.size(); i++) { + if (isActualPeak[i]) { + for (size_t j = i + 1; j < peaks.size(); j++) { + if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + + (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) { + isActualPeak[j] = false; + } + } + peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at(peaks[i]))); + } + } +} + +std::vector groupPeaksToPoses(const std::vector >& allPeaks, + const std::vector& pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore) { + const std::vector > limbIdsHeatmap = { + {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, + {2, 1}, {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18} + }; + const std::vector > limbIdsPaf = { + {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, {21, 22}, {23, 24}, {25, 26}, + {27, 28}, {29, 30}, {47, 48}, {49, 50}, {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46} + }; + + std::vector candidates; + for (const auto& peaks : allPeaks) { + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + } + std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t k = 0; k < limbIdsPaf.size(); k++) { + std::vector connections; + const int mapIdxOffset = keypointsNumber + 1; + std::pair scoreMid = { pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset] }; + const int idxJointA = limbIdsHeatmap[k].first - 1; + const int idxJointB = limbIdsHeatmap[k].second - 1; + const std::vector& candA = allPeaks[idxJointA]; + const std::vector& candB = allPeaks[idxJointB]; + const size_t nJointsA = candA.size(); + const size_t nJointsB = candB.size(); + if (nJointsA == 0 + && nJointsB == 0) { + continue; + } else if (nJointsA == 0) { + for (size_t i = 0; i < nJointsB; i++) { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointB] == candB[i].id) { + num++; + continue; + } + } + if (num == 0) { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointB] = candB[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candB[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } else if (nJointsB == 0) { + for (size_t i = 0; i < nJointsA; i++) { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == candA[i].id) { + num++; + continue; + } + } + if (num == 0) { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointA] = candA[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candA[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + + std::vector tempJointConnections; + for (size_t i = 0; i < nJointsA; i++) { + for (size_t j = 0; j < nJointsB; j++) { + cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; + cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); + cv::Point2f vec = candB[j].pos - candA[i].pos; + double norm_vec = cv::norm(vec); + if (norm_vec == 0) { + continue; + } + vec /= norm_vec; + float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); + int height_n = pafs[0].rows / 2; + float suc_ratio = 0.0f; + float mid_score = 0.0f; + const int mid_num = 10; + const float scoreThreshold = -100.0f; + if (score > scoreThreshold) { + float p_sum = 0; + int p_count = 0; + cv::Size2f step((candB[j].pos.x - candA[i].pos.x)/(mid_num - 1), + (candB[j].pos.y - candA[i].pos.y)/(mid_num - 1)); + for (int n = 0; n < mid_num; n++) { + cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), + cvRound(candA[i].pos.y + n * step.height)); + cv::Point2f pred(scoreMid.first.at(midPoint), + scoreMid.second.at(midPoint)); + score = vec.x * pred.x + vec.y * pred.y; + if (score > midPointsScoreThreshold) { + p_sum += score; + p_count++; + } + } + suc_ratio = static_cast(p_count / mid_num); + float ratio = p_count > 0 ? p_sum / p_count : 0.0f; + mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); + } + if (mid_score > 0 + && suc_ratio > foundMidPointsRatioThreshold) { + tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); + } + } + } + if (!tempJointConnections.empty()) { + std::sort(tempJointConnections.begin(), tempJointConnections.end(), + [](const TwoJointsConnection& a, + const TwoJointsConnection& b) { + return (a.score > b.score); + }); + } + size_t num_limbs = std::min(nJointsA, nJointsB); + size_t cnt = 0; + std::vector occurA(nJointsA, 0); + std::vector occurB(nJointsB, 0); + for (size_t row = 0; row < tempJointConnections.size(); row++) { + if (cnt == num_limbs) { + break; + } + const int& indexA = tempJointConnections[row].firstJointIdx; + const int& indexB = tempJointConnections[row].secondJointIdx; + const float& score = tempJointConnections[row].score; + if (occurA[indexA] == 0 + && occurB[indexB] == 0) { + connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); + cnt++; + occurA[indexA] = 1; + occurB[indexB] = 1; + } + } + if (connections.empty()) { + continue; + } + + bool extraJointConnections = (k == 17 || k == 18); + if (k == 0) { + subset = std::vector( + connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + subset[i].peaksIndices[idxJointA] = indexA; + subset[i].peaksIndices[idxJointB] = indexB; + subset[i].nJoints = 2; + subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + } + } else if (extraJointConnections) { + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == indexA + && subset[j].peaksIndices[idxJointB] == -1) { + subset[j].peaksIndices[idxJointB] = indexB; + } else if (subset[j].peaksIndices[idxJointB] == indexB + && subset[j].peaksIndices[idxJointA] == -1) { + subset[j].peaksIndices[idxJointA] = indexA; + } + } + } + continue; + } else { + for (size_t i = 0; i < connections.size(); i++) { + const int& indexA = connections[i].firstJointIdx; + const int& indexB = connections[i].secondJointIdx; + bool num = false; + for (size_t j = 0; j < subset.size(); j++) { + if (subset[j].peaksIndices[idxJointA] == indexA) { + subset[j].peaksIndices[idxJointB] = indexB; + subset[j].nJoints++; + subset[j].score += candidates[indexB].score + connections[i].score; + num = true; + } + } + if (!num) { + HumanPoseByPeaksIndices hpWithScore(keypointsNumber); + hpWithScore.peaksIndices[idxJointA] = indexA; + hpWithScore.peaksIndices[idxJointB] = indexB; + hpWithScore.nJoints = 2; + hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + subset.push_back(hpWithScore); + } + } + } + } + std::vector poses; + for (const auto& subsetI : subset) { + if (subsetI.nJoints < minJointsNumber + || subsetI.score / subsetI.nJoints < minSubsetScore) { + continue; + } + int position = -1; + Result pose(cv::Rect(), // TODO get the correct Rect + std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + subsetI.score * std::max(0, subsetI.nJoints - 1)); + for (const auto& peakIdx : subsetI.peaksIndices) { + position++; + if (peakIdx >= 0) { + pose.keypoints[position] = candidates[peakIdx].pos; + pose.keypoints[position].x += 0.5; + pose.keypoints[position].y += 0.5; + } + } + poses.push_back(pose); + } + return poses; +} +} // namespace human_pose_estimation diff --git a/vino_core_lib/src/pipeline_manager.cpp b/vino_core_lib/src/pipeline_manager.cpp index 497e863c..f72dd69f 100644 --- a/vino_core_lib/src/pipeline_manager.cpp +++ b/vino_core_lib/src/pipeline_manager.cpp @@ -192,14 +192,16 @@ PipelineManager::parseInference( } else if (infer.name == kInferTpye_ObjectDetection) { object = createObjectDetection(infer); - } else if (infer.name == kInferTpye_ObjectSegmentation) { object = createObjectSegmentation(infer); } else if (infer.name == kInferTpye_PersonReidentification) { object = createPersonReidentification(infer); - } + } + else if (infer.name == kInferTpye_HumanPoseEstimation) { + object = createHumanPoseEstimation(infer); + } else { slog::err << "Invalid inference name: " << infer.name << slog::endl; } @@ -325,6 +327,25 @@ PipelineManager::createPersonReidentification( return reidentification_inference_ptr; } +std::shared_ptr +PipelineManager::createHumanPoseEstimation( + const Params::ParamManager::InferenceParams & infer) +{ + // Following createObjectDetection and createFaceDetection + // 1 input, 2 outputs, 1 batch + auto human_estimation_model = + std::make_shared(infer.model, 1, 2, 1); + human_estimation_model->modelInit(); + auto pose_engine = std::make_shared( + plugins_for_devices_[infer.engine], human_estimation_model); + auto pose_inference_ptr = + std::make_shared(); //(infer.confidence_threshold); + pose_inference_ptr->loadNetwork(human_estimation_model); + pose_inference_ptr->loadEngine(pose_engine); + + return pose_inference_ptr; +} + void PipelineManager::threadPipeline(const char* name) { PipelineData& p = pipelines_[name]; while (p.state == PipelineState_ThreadRunning && p.pipeline != nullptr && ros::ok()) { diff --git a/vino_launch/launch/pipeline_pose.launch b/vino_launch/launch/pipeline_pose.launch new file mode 100644 index 00000000..6accc0e8 --- /dev/null +++ b/vino_launch/launch/pipeline_pose.launch @@ -0,0 +1,19 @@ + + + + + + + + + + + diff --git a/vino_launch/param/pipeline_pose.yaml b/vino_launch/param/pipeline_pose.yaml new file mode 100644 index 00000000..0e09afb4 --- /dev/null +++ b/vino_launch/param/pipeline_pose.yaml @@ -0,0 +1,44 @@ +Pipelines: +- name: people + inputs: [RealSenseCamera] + infers: + - name: HumanPoseEstimation + model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml + engine: MYRIAD + label: to/be/set/xxx.labels + batch: 1 + # - name: FaceDetection + # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/face-detection-adas-0001/FP16/face-detection-adas-0001.xml + # engine: MYRIAD + # label: to/be/set/xxx.labels + # batch: 1 + # - name: AgeGenderRecognition + # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml + # engine: MYRIAD + # label: to/be/set/xxx.labels + # batch: 16 + # - name: EmotionRecognition + # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml + # engine: MYRIAD + # label: to/be/set/xxx.labels + # batch: 16 + # - name: HeadPoseEstimation + # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001.xml + # engine: MYRIAD + # label: to/be/set/xxx.labels + # batch: 16 + outputs: [ImageWindow, RosTopic] + confidence_threshold: 0.2 + connects: + - left: RealSenseCamera + right: [HumanPoseEstimation] + - left: HumanPoseEstimation + right: [ImageWindow, RosTopic] + # - left: AgeGenderRecognition + # right: [ImageWindow, RosTopic] + # - left: EmotionRecognition + # right: [ImageWindow, RosTopic] + # - left: HeadPoseEstimation + # right: [ImageWindow, RosTopic] + +OpenvinoCommon: From b1c2aac63245b5a7d5bf4c30203e7c9f96398bd7 Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Wed, 15 Apr 2020 15:01:44 -0400 Subject: [PATCH 03/11] Adds ROS topic output for human pose estimation. --- .../vino_core_lib/outputs/ros_topic_output.h | 11 +++++ vino_core_lib/src/inferences/peak.cpp | 2 +- .../src/outputs/ros_topic_output.cpp | 43 ++++++++++++++++++- vino_launch/launch/pipeline_pose.launch | 4 +- vino_people_msgs/CMakeLists.txt | 2 + vino_people_msgs/msg/HumanPose.msg | 17 ++++++++ vino_people_msgs/msg/HumanPoseStamped.msg | 16 +++++++ 7 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 vino_people_msgs/msg/HumanPose.msg create mode 100644 vino_people_msgs/msg/HumanPoseStamped.msg diff --git a/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h b/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h index 5b783d98..adfc563d 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -109,6 +111,13 @@ class RosTopicOutput : public BaseOutput */ void accept(const std::vector&) override; + /** + * @brief Generate ros topic infomation according to + * the human pose estimation result. + * @param[in] An human pose estimation result objetc. + */ + void accept(const std::vector&) override; + private: std_msgs::Header getHeader(); @@ -130,6 +139,8 @@ class RosTopicOutput : public BaseOutput std::shared_ptr person_reid_msg_ptr_; ros::Publisher pub_segmented_object_; std::shared_ptr segmented_object_msg_ptr_; + ros::Publisher pub_human_pose_; + std::shared_ptr human_pose_msg_ptr_; }; } // namespace Outputs diff --git a/vino_core_lib/src/inferences/peak.cpp b/vino_core_lib/src/inferences/peak.cpp index d1934be6..6904e921 100644 --- a/vino_core_lib/src/inferences/peak.cpp +++ b/vino_core_lib/src/inferences/peak.cpp @@ -309,7 +309,7 @@ std::vector groupPeaksToPoses(const std::vector >& all continue; } int position = -1; - Result pose(cv::Rect(), // TODO get the correct Rect + Result pose(cv::Rect(), // TODO get the correct Rect, which can be the max/min x y points std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), subsetI.score * std::max(0, subsetI.nJoints - 1)); for (const auto& peakIdx : subsetI.peaksIndices) { diff --git a/vino_core_lib/src/outputs/ros_topic_output.cpp b/vino_core_lib/src/outputs/ros_topic_output.cpp index cbad29fc..d4fc65dd 100644 --- a/vino_core_lib/src/outputs/ros_topic_output.cpp +++ b/vino_core_lib/src/outputs/ros_topic_output.cpp @@ -24,6 +24,7 @@ #include #include #include +#include Outputs::RosTopicOutput::RosTopicOutput() { @@ -41,6 +42,8 @@ Outputs::RosTopicOutput::RosTopicOutput() "/openvino_toolkit/reidentified_persons", 16); pub_segmented_object_ = nh_.advertise( "/openvino_toolkit/segmented_obejcts", 16); + pub_human_pose_ = nh_.advertise( + "/openvino_toolkit/human_poses", 16); emotions_msg_ptr_ = NULL; faces_msg_ptr_ = NULL; @@ -49,6 +52,7 @@ Outputs::RosTopicOutput::RosTopicOutput() object_msg_ptr_ = NULL; person_reid_msg_ptr_ = NULL; segmented_object_msg_ptr_ = NULL; + human_pose_msg_ptr_ = NULL; } void Outputs::RosTopicOutput::feedFrame(const cv::Mat& frame) {} @@ -201,6 +205,34 @@ void Outputs::RosTopicOutput::accept( } } +void Outputs::RosTopicOutput::accept( + const std::vector& results) +{ + human_pose_msg_ptr_ = std::make_shared(); + + vino_people_msgs::HumanPose hp; + std::vector points; + points.reserve(18); // TODO read this from the inference engine. + for (auto r : results) + { + auto loc = r.getLocation(); + hp.roi.x_offset = loc.x; + hp.roi.y_offset = loc.y; + hp.roi.width = loc.width; + hp.roi.height = loc.height; + hp.score = r.getScore(); + auto p = geometry_msgs::Point(); + for (auto kp : r.keypoints) + { + p.x = kp.x; + p.y = kp.y; + p.z = -1; + hp.keypoints.push_back(p); + } + human_pose_msg_ptr_->humanposes.push_back(hp); + } +} + void Outputs::RosTopicOutput::handleOutput() { std_msgs::Header header = getHeader(); @@ -264,7 +296,7 @@ void Outputs::RosTopicOutput::handleOutput() pub_object_.publish(object_msg); object_msg_ptr_ = nullptr; } - if (object_msg_ptr_ != nullptr) + if (object_msg_ptr_ != nullptr) // TODO: verify and remove duplicate code. { object_msgs::ObjectsInBoxes object_msg; object_msg.header = header; @@ -273,6 +305,15 @@ void Outputs::RosTopicOutput::handleOutput() pub_object_.publish(object_msg); object_msg_ptr_ = nullptr; } + if (human_pose_msg_ptr_ != nullptr) + { + vino_people_msgs::HumanPoseStamped human_pose_msg; + human_pose_msg.header = header; + human_pose_msg.humanposes.swap(human_pose_msg_ptr_->humanposes); + + pub_human_pose_.publish(human_pose_msg); + human_pose_msg_ptr_ = nullptr; + } } std_msgs::Header Outputs::RosTopicOutput::getHeader() diff --git a/vino_launch/launch/pipeline_pose.launch b/vino_launch/launch/pipeline_pose.launch index 6accc0e8..358878c2 100644 --- a/vino_launch/launch/pipeline_pose.launch +++ b/vino_launch/launch/pipeline_pose.launch @@ -5,12 +5,12 @@ - + + diff --git a/vino_launch/param/pipeline_pose.yaml b/vino_launch/param/pipeline_pose.yaml index 0e09afb4..1e0674ab 100644 --- a/vino_launch/param/pipeline_pose.yaml +++ b/vino_launch/param/pipeline_pose.yaml @@ -27,13 +27,13 @@ Pipelines: # engine: MYRIAD # label: to/be/set/xxx.labels # batch: 16 - outputs: [ImageWindow, RosTopic] + outputs: [ImageWindow, RosTopic, RViz] confidence_threshold: 0.2 connects: - left: RealSenseCamera right: [HumanPoseEstimation] - left: HumanPoseEstimation - right: [ImageWindow, RosTopic] + right: [ImageWindow, RosTopic, RViz] # - left: AgeGenderRecognition # right: [ImageWindow, RosTopic] # - left: EmotionRecognition From 8206e0d574d6251aebcbea1234d0bf59253f21d0 Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Thu, 16 Apr 2020 18:34:56 -0400 Subject: [PATCH 06/11] Fix keypoint replication bug. --- vino_core_lib/src/outputs/ros_topic_output.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vino_core_lib/src/outputs/ros_topic_output.cpp b/vino_core_lib/src/outputs/ros_topic_output.cpp index d4fc65dd..7e2ae2a5 100644 --- a/vino_core_lib/src/outputs/ros_topic_output.cpp +++ b/vino_core_lib/src/outputs/ros_topic_output.cpp @@ -210,23 +210,23 @@ void Outputs::RosTopicOutput::accept( { human_pose_msg_ptr_ = std::make_shared(); - vino_people_msgs::HumanPose hp; - std::vector points; - points.reserve(18); // TODO read this from the inference engine. for (auto r : results) { + vino_people_msgs::HumanPose hp; + auto loc = r.getLocation(); hp.roi.x_offset = loc.x; hp.roi.y_offset = loc.y; hp.roi.width = loc.width; hp.roi.height = loc.height; hp.score = r.getScore(); + auto p = geometry_msgs::Point(); + p.z = -1; for (auto kp : r.keypoints) { p.x = kp.x; p.y = kp.y; - p.z = -1; hp.keypoints.push_back(p); } human_pose_msg_ptr_->humanposes.push_back(hp); From 7f212a8a32d524924cea3c920c2eeb16ba58811f Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Thu, 16 Apr 2020 18:41:38 -0400 Subject: [PATCH 07/11] Adds ROS service support for human pose estimation --- .../vino_core_lib/outputs/base_output.h | 2 + .../outputs/ros_service_output.h | 5 +- .../services/frame_processing_server.h | 1 + .../src/outputs/ros_service_output.cpp | 11 +++ .../src/services/frame_processing_server.cpp | 2 + .../launch/image_human_pose_server.launch | 8 ++ .../param/image_human_pose_server.yaml | 19 ++++ vino_people_msgs/CMakeLists.txt | 1 + vino_people_msgs/srv/HumanPoseSrv.srv | 17 ++++ vino_sample/CMakeLists.txt | 37 +++++++ vino_sample/src/image_human_pose_client.cpp | 69 +++++++++++++ vino_sample/src/image_human_pose_server.cpp | 98 +++++++++++++++++++ 12 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 vino_launch/launch/image_human_pose_server.launch create mode 100644 vino_launch/param/image_human_pose_server.yaml create mode 100644 vino_people_msgs/srv/HumanPoseSrv.srv create mode 100644 vino_sample/src/image_human_pose_client.cpp create mode 100644 vino_sample/src/image_human_pose_server.cpp diff --git a/vino_core_lib/include/vino_core_lib/outputs/base_output.h b/vino_core_lib/include/vino_core_lib/outputs/base_output.h index 4e85b678..0f0a0401 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/base_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/base_output.h @@ -133,6 +133,8 @@ class BaseOutput boost::shared_ptr response) {} virtual void setServiceResponse( boost::shared_ptr response) {} + virtual void setServiceResponse( + boost::shared_ptr response) {} Pipeline* getPipeline() const; cv::Mat getFrame() const; virtual void clearData() {} diff --git a/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h b/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h index 373ace30..9615c7c3 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -41,7 +43,7 @@ #include #include #include - +#include #include @@ -78,6 +80,7 @@ class RosServiceOutput : public RosTopicOutput void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); + void setServiceResponse(boost::shared_ptr response); private: const std::string service_name_; diff --git a/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h b/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h index b69d54c6..7d1cf431 100644 --- a/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h +++ b/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/vino_core_lib/src/outputs/ros_service_output.cpp b/vino_core_lib/src/outputs/ros_service_output.cpp index 4014c46e..d878434b 100644 --- a/vino_core_lib/src/outputs/ros_service_output.cpp +++ b/vino_core_lib/src/outputs/ros_service_output.cpp @@ -91,6 +91,16 @@ void Outputs::RosServiceOutput::setServiceResponse( } } +void Outputs::RosServiceOutput::setServiceResponse( + boost::shared_ptr response) +{ + slog::info << "in Human Pose Estimation service::Response ..."; + if (human_pose_msg_ptr_ != nullptr) { + response->humanposes = human_pose_msg_ptr_->humanposes; + } + slog::info << "DONE!" << slog::endl; +} + void Outputs::RosServiceOutput::setServiceResponse( boost::shared_ptr response) { @@ -126,4 +136,5 @@ void Outputs::RosServiceOutput::clearData() headpose_msg_ptr_ = nullptr; segmented_object_msg_ptr_ = nullptr; person_reid_msg_ptr_ = nullptr; + human_pose_msg_ptr_ = nullptr; } diff --git a/vino_core_lib/src/services/frame_processing_server.cpp b/vino_core_lib/src/services/frame_processing_server.cpp index 2f9675f4..ad5d3ba4 100644 --- a/vino_core_lib/src/services/frame_processing_server.cpp +++ b/vino_core_lib/src/services/frame_processing_server.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -99,4 +100,5 @@ template class FrameProcessingServer; template class FrameProcessingServer; template class FrameProcessingServer; template class FrameProcessingServer; +template class FrameProcessingServer; } // namespace vino_service diff --git a/vino_launch/launch/image_human_pose_server.launch b/vino_launch/launch/image_human_pose_server.launch new file mode 100644 index 00000000..16c191b1 --- /dev/null +++ b/vino_launch/launch/image_human_pose_server.launch @@ -0,0 +1,8 @@ + + + + + + + diff --git a/vino_launch/param/image_human_pose_server.yaml b/vino_launch/param/image_human_pose_server.yaml new file mode 100644 index 00000000..6ae92aeb --- /dev/null +++ b/vino_launch/param/image_human_pose_server.yaml @@ -0,0 +1,19 @@ +Pipelines: +- name: people + inputs: [Image] + infers: + - name: HumanPoseEstimation + model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml + engine: MYRIAD + label: to/be/set/xxx.labels + batch: 1 + outputs: [RosService] + confidence_threshold: 0.2 + connects: + - left: Image + right: [HumanPoseEstimation] + - left: HumanPoseEstimation + right: [RosService] + input_path: "/opt/openvino_toolkit/ros_openvino_toolkit/data/images/team.jpg" + +OpenvinoCommon: diff --git a/vino_people_msgs/CMakeLists.txt b/vino_people_msgs/CMakeLists.txt index d79c0738..cf577f92 100644 --- a/vino_people_msgs/CMakeLists.txt +++ b/vino_people_msgs/CMakeLists.txt @@ -47,6 +47,7 @@ add_service_files(FILES PeopleSrv.srv ReidentificationSrv.srv ObjectsInMasksSrv.srv + HumanPoseSrv.srv ) generate_messages(DEPENDENCIES std_msgs diff --git a/vino_people_msgs/srv/HumanPoseSrv.srv b/vino_people_msgs/srv/HumanPoseSrv.srv new file mode 100644 index 00000000..2e468088 --- /dev/null +++ b/vino_people_msgs/srv/HumanPoseSrv.srv @@ -0,0 +1,17 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +string image_path # input: an image +--- +HumanPose[] humanposes # ObjectInBox array diff --git a/vino_sample/CMakeLists.txt b/vino_sample/CMakeLists.txt index f2cbe305..259a0ebe 100644 --- a/vino_sample/CMakeLists.txt +++ b/vino_sample/CMakeLists.txt @@ -272,6 +272,43 @@ target_link_libraries(image_reidentification_server ${OpenCV_LIBRARIES} ) +add_executable(image_human_pose_client + src/image_human_pose_client.cpp +) + +add_dependencies(image_human_pose_client + ${${PROJECT_NAME}_EXPORTED_TARGETS} + ${catkin_EXPORTED_TARGETS} + ${vino_core_lib_TARGETS} +) + +target_link_libraries(image_human_pose_client + ${catkin_LIBRARIES} + cpu_extension + gflags + ${vino_param_lib_LIBRARIES} + ${InferenceEngine_LIBRARIES} + ${OpenCV_LIBRARIES} +) + +add_executable(image_human_pose_server + src/image_human_pose_server.cpp +) + +add_dependencies(image_human_pose_server + ${${PROJECT_NAME}_EXPORTED_TARGETS} + ${catkin_EXPORTED_TARGETS} + ${vino_core_lib_TARGETS} +) + +target_link_libraries(image_human_pose_server + ${catkin_LIBRARIES} + cpu_extension + gflags + ${vino_param_lib_LIBRARIES} + ${InferenceEngine_LIBRARIES} + ${OpenCV_LIBRARIES} +) if(UNIX OR APPLE) # Linker flags. diff --git a/vino_sample/src/image_human_pose_client.cpp b/vino_sample/src/image_human_pose_client.cpp new file mode 100644 index 00000000..42c136cc --- /dev/null +++ b/vino_sample/src/image_human_pose_client.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* \brief A sample for this library. This sample performs human pose estimation. +* \file src/image_human_pose_client.cpp +*/ + +#include +#include +#include "opencv2/opencv.hpp" + +#include + + + +int main(int argc, char ** argv) +{ + ros::init(argc, argv, "image_human_pose_client"); + + ros::NodeHandle n; + + ros::ServiceClient client = n.serviceClient("/openvino_toolkit/service"); + + + vino_people_msgs::HumanPoseSrv srv; + + if (client.call(srv)) + { + ROS_INFO("Request service success!"); + + for (unsigned int i = 0; i < srv.response.humanposes.size(); i++) { + std::ostringstream os; + os << "Pose " << i << ", keypoints (" << srv.response.humanposes[i].keypoints.size() << "):"; + + for (auto kp : srv.response.humanposes[i].keypoints) + { + os << std::endl<< "\tx: " << kp.x << "\ty: " << kp.y << "\tz: " << kp.z; + } + ROS_INFO(os.str().c_str()); + } + + + } + else + { + ROS_ERROR("Failed to request service \"openvino_toolkit/service\" "); return -1; + } + + + + +} + + + diff --git a/vino_sample/src/image_human_pose_server.cpp b/vino_sample/src/image_human_pose_server.cpp new file mode 100644 index 00000000..f5f9d764 --- /dev/null +++ b/vino_sample/src/image_human_pose_server.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* \brief A sample for this library. This sample performs human pose estimation. +* \file src/image_human_pose_server.cpp +*/ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "vino_core_lib/common.h" +#include "vino_core_lib/engines/engine.h" +#include "vino_core_lib/factory.h" +#include "vino_core_lib/inferences/age_gender_detection.h" +#include "vino_core_lib/inferences/base_inference.h" +#include "vino_core_lib/inferences/emotions_detection.h" +#include "vino_core_lib/inferences/face_detection.h" +#include "vino_core_lib/inferences/head_pose_detection.h" +#include "vino_core_lib/inferences/human_pose_estimation.h" +#include "vino_core_lib/inputs/realsense_camera_topic.h" +#include "vino_core_lib/outputs/image_window_output.h" +#include "vino_core_lib/outputs/ros_topic_output.h" +#include "vino_core_lib/outputs/rviz_output.h" +#include "vino_core_lib/pipeline.h" +#include "vino_core_lib/pipeline_manager.h" +#include "vino_core_lib/slog.h" +#include "inference_engine.hpp" +#include "opencv2/opencv.hpp" +#include "vino_sample/utility.hpp" +#include "vino_people_msgs/HumanPoseSrv.h" + +bool parseAndCheckCommandLine(int argc, char** argv) +{ + // -----Parsing and validation of input args--------------------------- + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + if (FLAGS_h) + { + showUsageForParam(); + return false; + } + + return true; +} + +int main(int argc, char** argv) +{ + ros::init(argc, argv, "human_pose_server"); + + if (!parseAndCheckCommandLine(argc, argv)) return 0; + + ros::param::param("~param_file", FLAGS_config, "/param/pipeline_pose.yaml"); + + slog::info << "FLAGS_config=" << FLAGS_config << slog::endl; + + std::string service_name = "/openvino_toolkit/service"; + slog::info << "service name = " << service_name << slog::endl; + // ----- Parsing and validation of input args----------------------- + + + auto node = std::make_shared>(service_name, FLAGS_config); + + slog::info << "Waiting for service request..." << slog::endl; + ros::spin(); + slog::info << "--------------End of Execution--------------" << FLAGS_config << slog::endl; + +} From 041534e93abf3b9e1c85be0f0b81bbf341554e7c Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Fri, 17 Apr 2020 14:46:54 -0400 Subject: [PATCH 08/11] Adds missing model verification. --- .../models/human_pose_estimation_model.cpp | 120 ++++++++++++------ 1 file changed, 80 insertions(+), 40 deletions(-) diff --git a/vino_core_lib/src/models/human_pose_estimation_model.cpp b/vino_core_lib/src/models/human_pose_estimation_model.cpp index bf1f8648..0875086a 100644 --- a/vino_core_lib/src/models/human_pose_estimation_model.cpp +++ b/vino_core_lib/src/models/human_pose_estimation_model.cpp @@ -1,25 +1,11 @@ // TODO add license -// TODO Remove these comments /* -Model URL: https://docs.openvinotoolkit.org/latest/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html - -# Inputs - -Shape: [1x3x256x456]. An input image in the [BxCxHxW] format , where: - B - batch size - C - number of channels - H - image height - W - image width. Expected color order is BGR. - -# Outputs - -The net outputs two blobs with the [1, 38, 32, 57] and [1, 19, 32, 57] shapes. -The first blob contains keypoint pairwise relations (part affinity fields), -while the second blob contains keypoint heatmaps. +Model URL: https://docs.openvinotoolkit.org/2019_R3.1/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html */ #include +#include #include "vino_core_lib/models/human_pose_estimation_model.h" #include "vino_core_lib/slog.h" @@ -60,34 +46,88 @@ void Models::HumanPoseEstimationModel::setLayerProperty( void Models::HumanPoseEstimationModel::checkLayerProperty( const InferenceEngine::CNNNetReader::Ptr& net_reader) { - slog::info << "Checking Age Gender Detection outputs" << slog::endl; + slog::info << "Checking Human Pose Estimation outputs" << slog::endl; InferenceEngine::OutputsDataMap output_info( net_reader->getNetwork().getOutputsInfo()); auto it = output_info.begin(); - InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; + InferenceEngine::DataPtr paf_output_ptr = (it++)->second; InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; - // output layer of age should be Convolution type - // TODO check if the output types are correct. -// if (heatmap_output_ptr->getCreatorLayer().lock()->type == "Convolution") -// { -// std::swap(keypoints_output_ptr, heatmap_output_ptr); -// } -// if (keypoints_output_ptr->getCreatorLayer().lock()->type != "Convolution") -// { -// throw std::logic_error("In Age Gender network, age layer (" + -// keypoints_output_ptr->getCreatorLayer().lock()->name + -// ") should be a Convolution, but was: " + -// keypoints_output_ptr->getCreatorLayer().lock()->type); -// } -// if (heatmap_output_ptr->getCreatorLayer().lock()->type != "SoftMax") -// { -// throw std::logic_error("In Age Gender network, gender layer (" + -// heatmap_output_ptr->getCreatorLayer().lock()->name + -// ") should be a SoftMax, but was: " + -// heatmap_output_ptr->getCreatorLayer().lock()->type); -// } - slog::info << "Keypoints layer: " - << keypoints_output_ptr->getCreatorLayer().lock()->name + + if (paf_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + paf_output_ptr->getCreatorLayer().lock()->type); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + heatmap_output_ptr->getCreatorLayer().lock()->type); + } + + if (paf_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(paf_output_ptr->getCreatorLayer().lock()->outData.size())); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(heatmap_output_ptr->getCreatorLayer().lock()->outData.size())); + } + + if (paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims.size() == 4 && + paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims[2] == 19) + { + std::swap(paf_output_ptr, heatmap_output_ptr); + } + + auto pafDims = paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims; + auto heatDims = heatmap_output_ptr->getCreatorLayer().lock()->outData[0]->dims; + + if (pafDims.size() != 4 || pafDims[0] != 57 || pafDims[1] != 32 + || pafDims[2] != 38 || pafDims[3] != 1) + { + std::ostringstream size; + size << "[ "; + for (size_t s : pafDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 38 1 ], but had: " + size.str()); + } + + if (heatDims.size() != 4 || heatDims[0] != 57 || heatDims[1] != 32 + || heatDims[2] != 19 || heatDims[3] != 1) + { + std::ostringstream size; + size << "[ "; + for (size_t s : heatDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 19 1 ], but had: " + size.str()); + } + + slog::info << "PAF layer: " + << paf_output_ptr->getCreatorLayer().lock()->name << slog::endl; slog::info << "Heatmap layer: " << heatmap_output_ptr->getCreatorLayer().lock()->name From bb1ca3f4c19f081fb87a37ed794ac92d6d353202 Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Fri, 17 Apr 2020 18:50:55 -0400 Subject: [PATCH 09/11] Fixes bugs and adds doc and parameter support. --- .../inferences/human_pose_estimation.h | 68 +- .../include/vino_core_lib/inferences/peak.h | 96 +-- .../src/inferences/human_pose_estimation.cpp | 175 +++--- vino_core_lib/src/inferences/peak.cpp | 588 ++++++++++-------- vino_core_lib/src/pipeline_manager.cpp | 12 +- vino_launch/param/pipeline_pose.yaml | 32 +- .../include/vino_param_lib/param_manager.h | 7 + vino_param_lib/src/param_manager.cpp | 12 + 8 files changed, 536 insertions(+), 454 deletions(-) diff --git a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h index 4b0b8c1f..0a225556 100644 --- a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h +++ b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h @@ -68,7 +68,11 @@ class HumanPoseEstimation : public BaseInference { public: using Result = vino_core_lib::HumanPoseResult; - HumanPoseEstimation(); + HumanPoseEstimation(float minPeaksDistance, + float midPointsScoreThreshold, + float foundMidPointsRatioThreshold, + int minJointsNumber, + float minSubsetScore); ~HumanPoseEstimation() override; /** * @brief Load the age gender detection model. @@ -120,20 +124,20 @@ class HumanPoseEstimation : public BaseInference private: /** - * @brief + * @brief Processess the network's outputs to extract the valid poses. * * Copied from: https://github.com/opencv/open_model_zoo/blob/master/demos/human_pose_estimation_demo/src/human_pose_estimator.cpp * - * @param heatMapsData - * @param heatMapOffset - * @param nHeatMaps - * @param pafsData - * @param pafOffset - * @param nPafs - * @param featureMapWidth - * @param featureMapHeight - * @param imageSize - * @return std::vector + * @param heatMapsData Data outputted by the heatmap network. + * @param heatMapOffset Size of each heatmap result. + * @param nHeatMaps Number of keypoints. + * @param pafsData Data outputted by the PAF network. + * @param pafOffset Size of each PAF result. + * @param nPafs Numver of PAFs. + * @param featureMapWidth Width of the heatmap. + * @param featureMapHeight Height of the heatmap. + * @param imageSize Size of the input image. + * @return std::vector A vector with the detected poses. */ std::vector postprocess( const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, @@ -141,16 +145,40 @@ class HumanPoseEstimation : public BaseInference const int featureMapWidth, const int featureMapHeight, const cv::Size& imageSize) const; + /** + * @brief Resizes the heatmap by upSampleRatio. + * + * @param featureMaps A vector with the heatmaps to resize. + */ void resizeFeatureMaps(std::vector& featureMaps) const; + /** + * @brief Extracts the poses from the given heatmaps and PAFs. + * + * @param heatMaps Postprocessed heatmaps. + * @param pafs Postprocessed PAFs. + * @return std::vector The detected poses. + */ std::vector extractPoses( const std::vector& heatMaps, const std::vector& pafs) const; + /** + * @brief Aligns the poses' keypoints to the input image. + * + * @param poses Poses (extracted from the heatmaps and PAFs). + * @param featureMapsSize The size of the heatmaps. + * @param imageSize The input image size. + */ void correctCoordinates(std::vector& poses, const cv::Size& featureMapsSize, const cv::Size& imageSize) const; + /** + * @brief Correct the bonding boxes based on the poses' keypoints. + * + * @param poses Poses (with corrected keypoints). + */ void correctROI(std::vector& poses) const; std::shared_ptr valid_model_; @@ -158,17 +186,15 @@ class HumanPoseEstimation : public BaseInference int width_ = 0; int height_ = 0; - // TODO add doc - int upsampleRatio_; // = 4; - float minPeaksDistance_; // = 3.0f; const size_t keypointsNumber_ = 18; - float midPointsScoreThreshold_; // = 0.05f; - float foundMidPointsRatioThreshold_; // = 0.8f; - int minJointsNumber_; // = 3; - float minSubsetScore_; // = 0.2f; - int stride_; // = 8; + int upsampleRatio_; + float minPeaksDistance_; + float midPointsScoreThreshold_; + float foundMidPointsRatioThreshold_; + int minJointsNumber_; + float minSubsetScore_; + int stride_; cv::Vec4i pad_; - cv::Size imageSize_; // = image.size(); }; } // namespace vino_core_lib diff --git a/vino_core_lib/include/vino_core_lib/inferences/peak.h b/vino_core_lib/include/vino_core_lib/inferences/peak.h index 20b1ac49..e0310ce8 100644 --- a/vino_core_lib/include/vino_core_lib/inferences/peak.h +++ b/vino_core_lib/include/vino_core_lib/inferences/peak.h @@ -16,66 +16,72 @@ namespace human_pose_estimation using Result = vino_core_lib::HumanPoseResult; -struct Peak { - Peak(const int id = -1, - const cv::Point2f& pos = cv::Point2f(), - const float score = 0.0f); - - int id; - cv::Point2f pos; - float score; +struct Peak +{ + Peak(const int id = -1, + const cv::Point2f &pos = cv::Point2f(), + const float score = 0.0f); + + int id; + cv::Point2f pos; + float score; }; -struct HumanPoseByPeaksIndices { - explicit HumanPoseByPeaksIndices(const int keypointsNumber); +struct HumanPoseByPeaksIndices +{ + explicit HumanPoseByPeaksIndices(const int keypointsNumber); - std::vector peaksIndices; - int nJoints; - float score; + std::vector peaksIndices; + int nJoints; + float score; }; -struct TwoJointsConnection { - TwoJointsConnection(const int firstJointIdx, - const int secondJointIdx, - const float score); +struct TwoJointsConnection +{ + TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score); - int firstJointIdx; - int secondJointIdx; - float score; + int firstJointIdx; + int secondJointIdx; + float score; }; -void findPeaks(const std::vector& heatMaps, +void findPeaks(const std::vector &heatMaps, const float minPeaksDistance, - std::vector >& allPeaks, + std::vector> &allPeaks, int heatMapId); std::vector groupPeaksToPoses( - const std::vector >& allPeaks, - const std::vector& pafs, - const size_t keypointsNumber, - const float midPointsScoreThreshold, - const float foundMidPointsRatioThreshold, - const int minJointsNumber, - const float minSubsetScore); - -class FindPeaksBody: public cv::ParallelLoopBody { + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); + +class FindPeaksBody : public cv::ParallelLoopBody +{ public: - FindPeaksBody(const std::vector& heatMaps, float minPeaksDistance, - std::vector >& peaksFromHeatMap) - : heatMaps(heatMaps), - minPeaksDistance(minPeaksDistance), - peaksFromHeatMap(peaksFromHeatMap) {} - - virtual void operator()(const cv::Range& range) const { - for (int i = range.start; i < range.end; i++) { - findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i); - } + FindPeaksBody(const std::vector &heatMaps, float minPeaksDistance, + std::vector> &peaksFromHeatMap) + : heatMaps(heatMaps), + minPeaksDistance(minPeaksDistance), + peaksFromHeatMap(peaksFromHeatMap) {} + + virtual void operator()(const cv::Range &range) const + { + for (int i = range.start; i < range.end; i++) + { + findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i); } + } private: - const std::vector& heatMaps; - float minPeaksDistance; - std::vector >& peaksFromHeatMap; + const std::vector &heatMaps; + float minPeaksDistance; + std::vector> &peaksFromHeatMap; }; -} // namespace human_pose_estimation +} // namespace human_pose_estimation diff --git a/vino_core_lib/src/inferences/human_pose_estimation.cpp b/vino_core_lib/src/inferences/human_pose_estimation.cpp index 0e414f5c..b74f4b69 100644 --- a/vino_core_lib/src/inferences/human_pose_estimation.cpp +++ b/vino_core_lib/src/inferences/human_pose_estimation.cpp @@ -32,14 +32,19 @@ vino_core_lib::HumanPoseResult::HumanPoseResult( } // HumanPoseEstimation -vino_core_lib::HumanPoseEstimation::HumanPoseEstimation() - : vino_core_lib::BaseInference(), +vino_core_lib::HumanPoseEstimation::HumanPoseEstimation( + float minPeaksDistance, + float midPointsScoreThreshold, + float foundMidPointsRatioThreshold, + int minJointsNumber, + float minSubsetScore) + : vino_core_lib::BaseInference(), upsampleRatio_(4), - minPeaksDistance_(3.0f), - midPointsScoreThreshold_(0.05f), - foundMidPointsRatioThreshold_(0.8f), - minJointsNumber_(3), - minSubsetScore_(0.2f), + minPeaksDistance_(minPeaksDistance), + midPointsScoreThreshold_(midPointsScoreThreshold), + foundMidPointsRatioThreshold_(foundMidPointsRatioThreshold), + minJointsNumber_(minJointsNumber), + minSubsetScore_(minSubsetScore), stride_(8), pad_(cv::Vec4i::all(0)) { @@ -58,17 +63,12 @@ bool vino_core_lib::HumanPoseEstimation::enqueue( const cv::Mat& frame, const cv::Rect& input_frame_loc) { // object_detection.cpp - if (width_ == 0 && height_ == 0) // TODO do we need this? + if (width_ == 0 && height_ == 0) { width_ = frame.cols; height_ = frame.rows; } - // TODO verify this. - imageSize_ = frame.size(); - - //TODO implement preprocessing? - if (!vino_core_lib::BaseInference::enqueue( frame, input_frame_loc, 1, 0, valid_model_->getInputName())) { @@ -95,10 +95,6 @@ bool vino_core_lib::HumanPoseEstimation::fetchResults() InferenceEngine::Blob::Ptr heatmapBlob = request->GetBlob(valid_model_->getOutputHeatmapName()); - //TODO implement postprocessing function. - - // At this point, keypointsBlob has the [1,38,32,57] output - results_.clear(); CV_Assert(heatmapBlob->getTensorDesc().getDims()[1] == keypointsNumber_ + 1); InferenceEngine::SizeVector heatMapDims = @@ -111,15 +107,7 @@ bool vino_core_lib::HumanPoseEstimation::fetchResults() keypointsBlob->buffer(), heatMapDims[2] * heatMapDims[3], keypointsBlob->getTensorDesc().getDims()[1], - heatMapDims[3], heatMapDims[2], imageSize_); - - slog::info << results_.size() << " people detected" << slog::endl; - - // for (size_t i = 0; i < results_.size(); ++i) - // { - // results_[i].age_ = ageBlob->buffer().as()[i] * 100; - // results_[i].male_prob_ = genderBlob->buffer().as()[i * 2 + 1]; - // } + heatMapDims[3], heatMapDims[2], cv::Size(width_, height_)); return true; } @@ -156,81 +144,93 @@ std::vector vino_core_lib::HumanPoseEstimation::postprocess( const int featureMapWidth, const int featureMapHeight, const cv::Size& imageSize) const { - std::vector heatMaps(nHeatMaps); - for (size_t i = 0; i < heatMaps.size(); i++) { - heatMaps[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, - reinterpret_cast( - const_cast( - heatMapsData + i * heatMapOffset))); - } - resizeFeatureMaps(heatMaps); - - std::vector pafs(nPafs); - for (size_t i = 0; i < pafs.size(); i++) { - pafs[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + std::vector heatMaps(nHeatMaps); + for (size_t i = 0; i < heatMaps.size(); i++) + { + heatMaps[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, reinterpret_cast( - const_cast( - pafsData + i * pafOffset))); - } - resizeFeatureMaps(pafs); + const_cast( + heatMapsData + i * heatMapOffset))); + } + + resizeFeatureMaps(heatMaps); + + std::vector pafs(nPafs); + for (size_t i = 0; i < pafs.size(); i++) + { + pafs[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + pafsData + i * pafOffset))); + } + resizeFeatureMaps(pafs); - std::vector poses = extractPoses(heatMaps, pafs); - correctCoordinates(poses, heatMaps[0].size(), imageSize); - correctROI(poses); - return poses; + std::vector poses = extractPoses(heatMaps, pafs); + correctCoordinates(poses, heatMaps[0].size(), imageSize); + correctROI(poses); + return poses; } std::vector vino_core_lib::HumanPoseEstimation::extractPoses( const std::vector& heatMaps, - const std::vector& pafs) const { - std::vector > peaksFromHeatMap(heatMaps.size()); - human_pose_estimation::FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance_, peaksFromHeatMap); - cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), - findPeaksBody); - int peaksBefore = 0; - for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) { - peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); - for (auto& peak : peaksFromHeatMap[heatmapId]) { - peak.id += peaksBefore; - } + const std::vector& pafs) const +{ + std::vector> peaksFromHeatMap(heatMaps.size()); + human_pose_estimation::FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance_, peaksFromHeatMap); + cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), findPeaksBody); + int peaksBefore = 0; + for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) + { + peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); + for (auto& peak : peaksFromHeatMap[heatmapId]) + { + peak.id += peaksBefore; } - std::vector poses = groupPeaksToPoses( - peaksFromHeatMap, pafs, keypointsNumber_, midPointsScoreThreshold_, - foundMidPointsRatioThreshold_, minJointsNumber_, minSubsetScore_); - return poses; + } + std::vector poses = groupPeaksToPoses( + peaksFromHeatMap, pafs, keypointsNumber_, midPointsScoreThreshold_, + foundMidPointsRatioThreshold_, minJointsNumber_, minSubsetScore_); + return poses; } -void vino_core_lib::HumanPoseEstimation::resizeFeatureMaps(std::vector& featureMaps) const { - for (auto& featureMap : featureMaps) { - cv::resize(featureMap, featureMap, cv::Size(), - upsampleRatio_, upsampleRatio_, cv::INTER_CUBIC); - } +void vino_core_lib::HumanPoseEstimation::resizeFeatureMaps( + std::vector& featureMaps) const +{ + for (auto& featureMap : featureMaps) + { + cv::resize(featureMap, featureMap, cv::Size(), + upsampleRatio_, upsampleRatio_, cv::INTER_CUBIC); + } } void vino_core_lib::HumanPoseEstimation::correctCoordinates(std::vector& poses, const cv::Size& featureMapsSize, - const cv::Size& imageSize) const { - CV_Assert(stride_ % upsampleRatio_ == 0); - - cv::Size fullFeatureMapSize = featureMapsSize * stride_ / upsampleRatio_; + const cv::Size& imageSize) const +{ + CV_Assert(stride_ % upsampleRatio_ == 0); - float scaleX = imageSize.width / - static_cast(fullFeatureMapSize.width - pad_(1) - pad_(3)); - float scaleY = imageSize.height / - static_cast(fullFeatureMapSize.height - pad_(0) - pad_(2)); - for (auto& pose : poses) { - for (auto& keypoint : pose.keypoints) { - if (keypoint != cv::Point2f(-1, -1)) { - keypoint.x *= stride_ / upsampleRatio_; - keypoint.x -= pad_(1); - keypoint.x *= scaleX; + cv::Size fullFeatureMapSize = featureMapsSize * stride_ / upsampleRatio_; - keypoint.y *= stride_ / upsampleRatio_; - keypoint.y -= pad_(0); - keypoint.y *= scaleY; - } - } + float scaleX = imageSize.width / + static_cast(fullFeatureMapSize.width - pad_(1) - pad_(3)); + float scaleY = imageSize.height / + static_cast(fullFeatureMapSize.height - pad_(0) - pad_(2)); + for (auto& pose : poses) + { + for (auto& keypoint : pose.keypoints) + { + if (keypoint != cv::Point2f(-1, -1)) + { + keypoint.x *= stride_ / upsampleRatio_; + keypoint.x -= pad_(1); + keypoint.x *= scaleX; + + keypoint.y *= stride_ / upsampleRatio_; + keypoint.y -= pad_(0); + keypoint.y *= scaleY; + } } + } } void vino_core_lib::HumanPoseEstimation::correctROI( @@ -250,11 +250,12 @@ void vino_core_lib::HumanPoseEstimation::correctROI( int y = static_cast(kp.y); if (x > xMax) xMax = x; - else if (x < xMin) xMin = x; + if (x < xMin) xMin = x; if (y > yMax) yMax = y; - else if (y < yMin) yMin = y; + if (y < yMin) yMin = y; } + // slog::info << "rect at: (" << xMin << ", " << yMin << "), (" << xMax << ", " << yMax << ")" << slog::endl; cv::Rect newLocation = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin); pose.setLocation(newLocation); } diff --git a/vino_core_lib/src/inferences/peak.cpp b/vino_core_lib/src/inferences/peak.cpp index 6904e921..5968509f 100644 --- a/vino_core_lib/src/inferences/peak.cpp +++ b/vino_core_lib/src/inferences/peak.cpp @@ -8,8 +8,9 @@ #include "vino_core_lib/inferences/peak.h" -namespace human_pose_estimation { -Peak::Peak(const int id, const cv::Point2f& pos, const float score) +namespace human_pose_estimation +{ +Peak::Peak(const int id, const cv::Point2f &pos, const float score) : id(id), pos(pos), score(score) {} @@ -26,302 +27,343 @@ TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, secondJointIdx(secondJointIdx), score(score) {} -void findPeaks(const std::vector& heatMaps, +void findPeaks(const std::vector &heatMaps, const float minPeaksDistance, - std::vector >& allPeaks, - int heatMapId) { - const float threshold = 0.1f; - std::vector peaks; - const cv::Mat& heatMap = heatMaps[heatMapId]; - const float* heatMapData = heatMap.ptr(); - size_t heatMapStep = heatMap.step1(); - for (int y = -1; y < heatMap.rows + 1; y++) { - for (int x = -1; x < heatMap.cols + 1; x++) { - float val = 0; - if (x >= 0 - && y >= 0 - && x < heatMap.cols - && y < heatMap.rows) { - val = heatMapData[y * heatMapStep + x]; - val = val >= threshold ? val : 0; - } + std::vector> &allPeaks, + int heatMapId) +{ + const float threshold = 0.1f; + std::vector peaks; + const cv::Mat &heatMap = heatMaps[heatMapId]; + const float *heatMapData = heatMap.ptr(); + size_t heatMapStep = heatMap.step1(); + for (int y = -1; y < heatMap.rows + 1; y++) + { + for (int x = -1; x < heatMap.cols + 1; x++) + { + float val = 0; + if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) + { + val = heatMapData[y * heatMapStep + x]; + val = val >= threshold ? val : 0; + } - float left_val = 0; - if (y >= 0 - && x < (heatMap.cols - 1) - && y < heatMap.rows) { - left_val = heatMapData[y * heatMapStep + x + 1]; - left_val = left_val >= threshold ? left_val : 0; - } + float left_val = 0; + if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) + { + left_val = heatMapData[y * heatMapStep + x + 1]; + left_val = left_val >= threshold ? left_val : 0; + } - float right_val = 0; - if (x > 0 - && y >= 0 - && y < heatMap.rows) { - right_val = heatMapData[y * heatMapStep + x - 1]; - right_val = right_val >= threshold ? right_val : 0; - } + float right_val = 0; + if (x > 0 && y >= 0 && y < heatMap.rows) + { + right_val = heatMapData[y * heatMapStep + x - 1]; + right_val = right_val >= threshold ? right_val : 0; + } - float top_val = 0; - if (x >= 0 - && x < heatMap.cols - && y < (heatMap.rows - 1)) { - top_val = heatMapData[(y + 1) * heatMapStep + x]; - top_val = top_val >= threshold ? top_val : 0; - } + float top_val = 0; + if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) + { + top_val = heatMapData[(y + 1) * heatMapStep + x]; + top_val = top_val >= threshold ? top_val : 0; + } - float bottom_val = 0; - if (x >= 0 - && y > 0 - && x < heatMap.cols) { - bottom_val = heatMapData[(y - 1) * heatMapStep + x]; - bottom_val = bottom_val >= threshold ? bottom_val : 0; - } + float bottom_val = 0; + if (x >= 0 && y > 0 && x < heatMap.cols) + { + bottom_val = heatMapData[(y - 1) * heatMapStep + x]; + bottom_val = bottom_val >= threshold ? bottom_val : 0; + } - if ((val > left_val) - && (val > right_val) - && (val > top_val) - && (val > bottom_val)) { - peaks.push_back(cv::Point(x, y)); - } - } + if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) + { + peaks.push_back(cv::Point(x, y)); + } } - std::sort(peaks.begin(), peaks.end(), [](const cv::Point& a, const cv::Point& b) { - return a.x < b.x; - }); - std::vector isActualPeak(peaks.size(), true); - int peakCounter = 0; - std::vector& peaksWithScoreAndID = allPeaks[heatMapId]; - for (size_t i = 0; i < peaks.size(); i++) { - if (isActualPeak[i]) { - for (size_t j = i + 1; j < peaks.size(); j++) { - if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + - (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) { - isActualPeak[j] = false; - } - } - peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at(peaks[i]))); + } + std::sort(peaks.begin(), peaks.end(), [](const cv::Point &a, const cv::Point &b) { + return a.x < b.x; + }); + std::vector isActualPeak(peaks.size(), true); + int peakCounter = 0; + std::vector &peaksWithScoreAndID = allPeaks[heatMapId]; + for (size_t i = 0; i < peaks.size(); i++) + { + if (isActualPeak[i]) + { + for (size_t j = i + 1; j < peaks.size(); j++) + { + if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + + (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) + { + isActualPeak[j] = false; } + } + peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at(peaks[i]))); } + } } -std::vector groupPeaksToPoses(const std::vector >& allPeaks, - const std::vector& pafs, - const size_t keypointsNumber, - const float midPointsScoreThreshold, - const float foundMidPointsRatioThreshold, - const int minJointsNumber, - const float minSubsetScore) { - const std::vector > limbIdsHeatmap = { - {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, - {2, 1}, {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18} - }; - const std::vector > limbIdsPaf = { - {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, {21, 22}, {23, 24}, {25, 26}, - {27, 28}, {29, 30}, {47, 48}, {49, 50}, {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46} - }; +std::vector groupPeaksToPoses( + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore) +{ + const std::vector> limbIdsHeatmap = { + {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, + {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, {2, 1}, + {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18}}; + const std::vector> limbIdsPaf = { + {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, + {21, 22}, {23, 24}, {25, 26}, {27, 28}, {29, 30}, {47, 48}, {49, 50}, + {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46}}; - std::vector candidates; - for (const auto& peaks : allPeaks) { - candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + std::vector candidates; + for (const auto &peaks : allPeaks) + { + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + } + std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t k = 0; k < limbIdsPaf.size(); k++) + { + std::vector connections; + const int mapIdxOffset = keypointsNumber + 1; + std::pair scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset]}; + const int idxJointA = limbIdsHeatmap[k].first - 1; + const int idxJointB = limbIdsHeatmap[k].second - 1; + const std::vector &candA = allPeaks[idxJointA]; + const std::vector &candB = allPeaks[idxJointB]; + const size_t nJointsA = candA.size(); + const size_t nJointsB = candB.size(); + if (nJointsA == 0 && nJointsB == 0) + { + continue; } - std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); - for (size_t k = 0; k < limbIdsPaf.size(); k++) { - std::vector connections; - const int mapIdxOffset = keypointsNumber + 1; - std::pair scoreMid = { pafs[limbIdsPaf[k].first - mapIdxOffset], - pafs[limbIdsPaf[k].second - mapIdxOffset] }; - const int idxJointA = limbIdsHeatmap[k].first - 1; - const int idxJointB = limbIdsHeatmap[k].second - 1; - const std::vector& candA = allPeaks[idxJointA]; - const std::vector& candB = allPeaks[idxJointB]; - const size_t nJointsA = candA.size(); - const size_t nJointsB = candB.size(); - if (nJointsA == 0 - && nJointsB == 0) { + else if (nJointsA == 0) + { + for (size_t i = 0; i < nJointsB; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointB] == candB[i].id) + { + num++; continue; - } else if (nJointsA == 0) { - for (size_t i = 0; i < nJointsB; i++) { - int num = 0; - for (size_t j = 0; j < subset.size(); j++) { - if (subset[j].peaksIndices[idxJointB] == candB[i].id) { - num++; - continue; - } - } - if (num == 0) { - HumanPoseByPeaksIndices personKeypoints(keypointsNumber); - personKeypoints.peaksIndices[idxJointB] = candB[i].id; - personKeypoints.nJoints = 1; - personKeypoints.score = candB[i].score; - subset.push_back(personKeypoints); - } - } - continue; - } else if (nJointsB == 0) { - for (size_t i = 0; i < nJointsA; i++) { - int num = 0; - for (size_t j = 0; j < subset.size(); j++) { - if (subset[j].peaksIndices[idxJointA] == candA[i].id) { - num++; - continue; - } - } - if (num == 0) { - HumanPoseByPeaksIndices personKeypoints(keypointsNumber); - personKeypoints.peaksIndices[idxJointA] = candA[i].id; - personKeypoints.nJoints = 1; - personKeypoints.score = candA[i].score; - subset.push_back(personKeypoints); - } - } + } + } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointB] = candB[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candB[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + else if (nJointsB == 0) + { + for (size_t i = 0; i < nJointsA; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == candA[i].id) + { + num++; continue; + } } - - std::vector tempJointConnections; - for (size_t i = 0; i < nJointsA; i++) { - for (size_t j = 0; j < nJointsB; j++) { - cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; - cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); - cv::Point2f vec = candB[j].pos - candA[i].pos; - double norm_vec = cv::norm(vec); - if (norm_vec == 0) { - continue; - } - vec /= norm_vec; - float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); - int height_n = pafs[0].rows / 2; - float suc_ratio = 0.0f; - float mid_score = 0.0f; - const int mid_num = 10; - const float scoreThreshold = -100.0f; - if (score > scoreThreshold) { - float p_sum = 0; - int p_count = 0; - cv::Size2f step((candB[j].pos.x - candA[i].pos.x)/(mid_num - 1), - (candB[j].pos.y - candA[i].pos.y)/(mid_num - 1)); - for (int n = 0; n < mid_num; n++) { - cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), - cvRound(candA[i].pos.y + n * step.height)); - cv::Point2f pred(scoreMid.first.at(midPoint), - scoreMid.second.at(midPoint)); - score = vec.x * pred.x + vec.y * pred.y; - if (score > midPointsScoreThreshold) { - p_sum += score; - p_count++; - } - } - suc_ratio = static_cast(p_count / mid_num); - float ratio = p_count > 0 ? p_sum / p_count : 0.0f; - mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); - } - if (mid_score > 0 - && suc_ratio > foundMidPointsRatioThreshold) { - tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); - } - } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointA] = candA[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candA[i].score; + subset.push_back(personKeypoints); } - if (!tempJointConnections.empty()) { - std::sort(tempJointConnections.begin(), tempJointConnections.end(), - [](const TwoJointsConnection& a, - const TwoJointsConnection& b) { - return (a.score > b.score); - }); + } + continue; + } + + std::vector tempJointConnections; + for (size_t i = 0; i < nJointsA; i++) + { + for (size_t j = 0; j < nJointsB; j++) + { + cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; + cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); + cv::Point2f vec = candB[j].pos - candA[i].pos; + double norm_vec = cv::norm(vec); + if (norm_vec == 0) + { + continue; } - size_t num_limbs = std::min(nJointsA, nJointsB); - size_t cnt = 0; - std::vector occurA(nJointsA, 0); - std::vector occurB(nJointsB, 0); - for (size_t row = 0; row < tempJointConnections.size(); row++) { - if (cnt == num_limbs) { - break; - } - const int& indexA = tempJointConnections[row].firstJointIdx; - const int& indexB = tempJointConnections[row].secondJointIdx; - const float& score = tempJointConnections[row].score; - if (occurA[indexA] == 0 - && occurB[indexB] == 0) { - connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); - cnt++; - occurA[indexA] = 1; - occurB[indexB] = 1; + vec /= norm_vec; + float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); + int height_n = pafs[0].rows / 2; + float suc_ratio = 0.0f; + float mid_score = 0.0f; + const int mid_num = 10; + const float scoreThreshold = -100.0f; + if (score > scoreThreshold) + { + float p_sum = 0; + int p_count = 0; + cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1), + (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1)); + for (int n = 0; n < mid_num; n++) + { + cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), + cvRound(candA[i].pos.y + n * step.height)); + cv::Point2f pred(scoreMid.first.at(midPoint), + scoreMid.second.at(midPoint)); + score = vec.x * pred.x + vec.y * pred.y; + if (score > midPointsScoreThreshold) + { + p_sum += score; + p_count++; } + } + suc_ratio = static_cast(p_count / mid_num); + float ratio = p_count > 0 ? p_sum / p_count : 0.0f; + mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); } - if (connections.empty()) { - continue; + if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) + { + tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); } + } + } + if (!tempJointConnections.empty()) + { + std::sort(tempJointConnections.begin(), tempJointConnections.end(), + [](const TwoJointsConnection &a, + const TwoJointsConnection &b) { + return (a.score > b.score); + }); + } + size_t num_limbs = std::min(nJointsA, nJointsB); + size_t cnt = 0; + std::vector occurA(nJointsA, 0); + std::vector occurB(nJointsB, 0); + for (size_t row = 0; row < tempJointConnections.size(); row++) + { + if (cnt == num_limbs) + { + break; + } + const int &indexA = tempJointConnections[row].firstJointIdx; + const int &indexB = tempJointConnections[row].secondJointIdx; + const float &score = tempJointConnections[row].score; + if (occurA[indexA] == 0 && occurB[indexB] == 0) + { + connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); + cnt++; + occurA[indexA] = 1; + occurB[indexB] = 1; + } + } + if (connections.empty()) + { + continue; + } - bool extraJointConnections = (k == 17 || k == 18); - if (k == 0) { - subset = std::vector( - connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); - for (size_t i = 0; i < connections.size(); i++) { - const int& indexA = connections[i].firstJointIdx; - const int& indexB = connections[i].secondJointIdx; - subset[i].peaksIndices[idxJointA] = indexA; - subset[i].peaksIndices[idxJointB] = indexB; - subset[i].nJoints = 2; - subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; - } - } else if (extraJointConnections) { - for (size_t i = 0; i < connections.size(); i++) { - const int& indexA = connections[i].firstJointIdx; - const int& indexB = connections[i].secondJointIdx; - for (size_t j = 0; j < subset.size(); j++) { - if (subset[j].peaksIndices[idxJointA] == indexA - && subset[j].peaksIndices[idxJointB] == -1) { - subset[j].peaksIndices[idxJointB] = indexB; - } else if (subset[j].peaksIndices[idxJointB] == indexB - && subset[j].peaksIndices[idxJointA] == -1) { - subset[j].peaksIndices[idxJointA] = indexA; - } - } - } - continue; - } else { - for (size_t i = 0; i < connections.size(); i++) { - const int& indexA = connections[i].firstJointIdx; - const int& indexB = connections[i].secondJointIdx; - bool num = false; - for (size_t j = 0; j < subset.size(); j++) { - if (subset[j].peaksIndices[idxJointA] == indexA) { - subset[j].peaksIndices[idxJointB] = indexB; - subset[j].nJoints++; - subset[j].score += candidates[indexB].score + connections[i].score; - num = true; - } - } - if (!num) { - HumanPoseByPeaksIndices hpWithScore(keypointsNumber); - hpWithScore.peaksIndices[idxJointA] = indexA; - hpWithScore.peaksIndices[idxJointB] = indexB; - hpWithScore.nJoints = 2; - hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; - subset.push_back(hpWithScore); - } - } + bool extraJointConnections = (k == 17 || k == 18); + if (k == 0) + { + subset = std::vector( + connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + subset[i].peaksIndices[idxJointA] = indexA; + subset[i].peaksIndices[idxJointB] = indexB; + subset[i].nJoints = 2; + subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + } + } + else if (extraJointConnections) + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) + { + subset[j].peaksIndices[idxJointB] = indexB; + } + else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) + { + subset[j].peaksIndices[idxJointA] = indexA; + } } + } + continue; } - std::vector poses; - for (const auto& subsetI : subset) { - if (subsetI.nJoints < minJointsNumber - || subsetI.score / subsetI.nJoints < minSubsetScore) { - continue; + else + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + bool num = false; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA) + { + subset[j].peaksIndices[idxJointB] = indexB; + subset[j].nJoints++; + subset[j].score += candidates[indexB].score + connections[i].score; + num = true; + } } - int position = -1; - Result pose(cv::Rect(), // TODO get the correct Rect, which can be the max/min x y points - std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), - subsetI.score * std::max(0, subsetI.nJoints - 1)); - for (const auto& peakIdx : subsetI.peaksIndices) { - position++; - if (peakIdx >= 0) { - pose.keypoints[position] = candidates[peakIdx].pos; - pose.keypoints[position].x += 0.5; - pose.keypoints[position].y += 0.5; - } + if (!num) + { + HumanPoseByPeaksIndices hpWithScore(keypointsNumber); + hpWithScore.peaksIndices[idxJointA] = indexA; + hpWithScore.peaksIndices[idxJointB] = indexB; + hpWithScore.nJoints = 2; + hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + subset.push_back(hpWithScore); } - poses.push_back(pose); + } + } + } + std::vector poses; + for (const auto &subsetI : subset) + { + if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) + { + continue; + } + int position = -1; + Result pose(cv::Rect(), + std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + subsetI.score * std::max(0, subsetI.nJoints - 1)); + for (const auto &peakIdx : subsetI.peaksIndices) + { + position++; + if (peakIdx >= 0) + { + pose.keypoints[position] = candidates[peakIdx].pos; + pose.keypoints[position].x += 0.5; + pose.keypoints[position].y += 0.5; + } } - return poses; + poses.push_back(pose); + } + return poses; } -} // namespace human_pose_estimation +} // namespace human_pose_estimation diff --git a/vino_core_lib/src/pipeline_manager.cpp b/vino_core_lib/src/pipeline_manager.cpp index f72dd69f..f4228082 100644 --- a/vino_core_lib/src/pipeline_manager.cpp +++ b/vino_core_lib/src/pipeline_manager.cpp @@ -338,8 +338,18 @@ PipelineManager::createHumanPoseEstimation( human_estimation_model->modelInit(); auto pose_engine = std::make_shared( plugins_for_devices_[infer.engine], human_estimation_model); + slog::info << "params: " << infer.min_peaks_distance << ", " << + infer.mid_points_score_threshold << ", " << + infer.found_mid_points_ratio_threshold << ", " << + infer.min_joints_number << ", " << + infer.min_subset_score << slog::endl; auto pose_inference_ptr = - std::make_shared(); //(infer.confidence_threshold); + std::make_shared( + infer.min_peaks_distance, + infer.mid_points_score_threshold, + infer.found_mid_points_ratio_threshold, + infer.min_joints_number, + infer.min_subset_score); pose_inference_ptr->loadNetwork(human_estimation_model); pose_inference_ptr->loadEngine(pose_engine); diff --git a/vino_launch/param/pipeline_pose.yaml b/vino_launch/param/pipeline_pose.yaml index 1e0674ab..7ecaeccd 100644 --- a/vino_launch/param/pipeline_pose.yaml +++ b/vino_launch/param/pipeline_pose.yaml @@ -7,38 +7,16 @@ Pipelines: engine: MYRIAD label: to/be/set/xxx.labels batch: 1 - # - name: FaceDetection - # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/face-detection-adas-0001/FP16/face-detection-adas-0001.xml - # engine: MYRIAD - # label: to/be/set/xxx.labels - # batch: 1 - # - name: AgeGenderRecognition - # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml - # engine: MYRIAD - # label: to/be/set/xxx.labels - # batch: 16 - # - name: EmotionRecognition - # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml - # engine: MYRIAD - # label: to/be/set/xxx.labels - # batch: 16 - # - name: HeadPoseEstimation - # model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001.xml - # engine: MYRIAD - # label: to/be/set/xxx.labels - # batch: 16 + min_peaks_distance: 3.0 + mid_points_score_threshold: 0.05 + found_mid_points_ratio_threshold: 0.8 + min_joints_number: 3 + min_subset_score: 0.2 outputs: [ImageWindow, RosTopic, RViz] - confidence_threshold: 0.2 connects: - left: RealSenseCamera right: [HumanPoseEstimation] - left: HumanPoseEstimation right: [ImageWindow, RosTopic, RViz] - # - left: AgeGenderRecognition - # right: [ImageWindow, RosTopic] - # - left: EmotionRecognition - # right: [ImageWindow, RosTopic] - # - left: HeadPoseEstimation - # right: [ImageWindow, RosTopic] OpenvinoCommon: diff --git a/vino_param_lib/include/vino_param_lib/param_manager.h b/vino_param_lib/include/vino_param_lib/param_manager.h index 1f0a5cc3..4ab13deb 100644 --- a/vino_param_lib/include/vino_param_lib/param_manager.h +++ b/vino_param_lib/include/vino_param_lib/param_manager.h @@ -70,6 +70,13 @@ class ParamManager // singleton int batch; float confidence_threshold = 0.5; bool enable_roi_constraint = false; + + // Human Pose Estimation parameters. + float min_peaks_distance = 3.0; + float mid_points_score_threshold = 0.05; + float found_mid_points_ratio_threshold = 0.8; + int min_joints_number = 3; + float min_subset_score = 0.2; }; struct PipelineParams { diff --git a/vino_param_lib/src/param_manager.cpp b/vino_param_lib/src/param_manager.cpp index 3d5d2bb6..8f362a87 100644 --- a/vino_param_lib/src/param_manager.cpp +++ b/vino_param_lib/src/param_manager.cpp @@ -105,6 +105,13 @@ void operator>>(const YAML::Node& node, ParamManager::InferenceParams& infer) YAML_PARSE(node, "batch", infer.batch) YAML_PARSE(node, "confidence_threshold", infer.confidence_threshold) YAML_PARSE(node, "enable_roi_constraint", infer.enable_roi_constraint) + + // Human Pose Estimation parameters + YAML_PARSE(node, "min_peaks_distance", infer.min_peaks_distance) + YAML_PARSE(node, "mid_points_score_threshold", infer.mid_points_score_threshold) + YAML_PARSE(node, "found_mid_points_ratio_threshold", infer.found_mid_points_ratio_threshold) + YAML_PARSE(node, "min_joints_number", infer.min_joints_number) + YAML_PARSE(node, "min_subset_score", infer.min_subset_score) slog::info << "Inference Params:name=" << infer.name << slog::endl; } @@ -184,6 +191,11 @@ void ParamManager::print() const slog::info << "\t\tBatch: " << infer.batch << slog::endl; slog::info << "\t\tConfidence_threshold: " << infer.confidence_threshold << slog::endl; slog::info << "\t\tEnable_roi_constraint: " << infer.enable_roi_constraint << slog::endl; + slog::info << "\t\tMin_peaks_distance: " << infer.min_peaks_distance << slog::endl; + slog::info << "\t\tMid_points_score_threshold: " << infer.mid_points_score_threshold << slog::endl; + slog::info << "\t\tFound_mid_points_ratio_threshold: " << infer.found_mid_points_ratio_threshold << slog::endl; + slog::info << "\t\tMin_joints_number: " << infer.min_joints_number << slog::endl; + slog::info << "\t\tMin_subset_score: " << infer.min_subset_score << slog::endl; } slog::info << "\tConnections: " << slog::endl; From 8f5464b2f1c11fc8f7ed4a4609f3900883c1bf9f Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Mon, 20 Apr 2020 18:37:07 -0400 Subject: [PATCH 10/11] Adds scores to individual keypoints. --- .../inferences/human_pose_estimation.h | 19 ++++++++++++++++--- .../outputs/image_window_output.h | 2 +- .../src/inferences/human_pose_estimation.cpp | 2 +- vino_core_lib/src/inferences/peak.cpp | 3 ++- .../src/outputs/image_window_output.cpp | 7 ++++++- .../src/outputs/ros_topic_output.cpp | 4 ++-- vino_people_msgs/CMakeLists.txt | 1 + vino_people_msgs/msg/HumanPose.msg | 4 ++-- vino_people_msgs/msg/HumanPoseKeypoint.msg | 3 +++ vino_sample/src/image_human_pose_client.cpp | 2 +- 10 files changed, 35 insertions(+), 12 deletions(-) create mode 100644 vino_people_msgs/msg/HumanPoseKeypoint.msg diff --git a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h index 0a225556..b1bd74e6 100644 --- a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h +++ b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h @@ -20,6 +20,18 @@ namespace vino_core_lib { +class HumanPoseKeypoint : public cv::Point2f +{ +public: + HumanPoseKeypoint(float x, float y, float score) + : HumanPoseKeypoint::Point_(x, y), + score(score) {}; + HumanPoseKeypoint(cv::Point2f point) + : HumanPoseKeypoint::Point_(point), + score(score) {}; + float score; +}; + /** * @class HumanPoseResult * @brief Class for storing and processing age and gender detection result. @@ -30,7 +42,7 @@ class HumanPoseResult : public Result explicit HumanPoseResult(const cv::Rect& location); HumanPoseResult( const cv::Rect& location, - const std::vector& keypoints, // = std::vector(), + const std::vector& keypoints, // = std::vector(), const float& score); // = 0); // Following similar structure of vino_core_lib/inferences/object_detection.h @@ -40,7 +52,7 @@ class HumanPoseResult : public Result * @brief Get the age keypoints of the estimated pose from the result. * @return The estimated keypoints. */ - std::vector getKeypoints() const + std::vector getKeypoints() const { return keypoints; } @@ -54,7 +66,8 @@ class HumanPoseResult : public Result return score; } - std::vector keypoints; + std::vector keypointsScores; + std::vector keypoints; float score = -1; }; diff --git a/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h b/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h index d5207f70..4fb41274 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h @@ -136,7 +136,7 @@ class ImageWindowOutput : public BaseOutput cv::Point hp_y; // for headpose, end point of yAxis cv::Point hp_zs; // for headpose, start point of zAxis cv::Point hp_ze; // for headpose, end point of zAxis - std::vector kp; // for humanpose, keypoints + std::vector kp; // for humanpose, keypoints }; std::vector outputs_; diff --git a/vino_core_lib/src/inferences/human_pose_estimation.cpp b/vino_core_lib/src/inferences/human_pose_estimation.cpp index b74f4b69..f1f9bb8a 100644 --- a/vino_core_lib/src/inferences/human_pose_estimation.cpp +++ b/vino_core_lib/src/inferences/human_pose_estimation.cpp @@ -23,7 +23,7 @@ vino_core_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) vino_core_lib::HumanPoseResult::HumanPoseResult( const cv::Rect& location, - const std::vector& keypoints, + const std::vector& keypoints, const float& score) : keypoints(keypoints), score(score), diff --git a/vino_core_lib/src/inferences/peak.cpp b/vino_core_lib/src/inferences/peak.cpp index 5968509f..532fb6ae 100644 --- a/vino_core_lib/src/inferences/peak.cpp +++ b/vino_core_lib/src/inferences/peak.cpp @@ -350,7 +350,7 @@ std::vector groupPeaksToPoses( } int position = -1; Result pose(cv::Rect(), - std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), subsetI.score * std::max(0, subsetI.nJoints - 1)); for (const auto &peakIdx : subsetI.peaksIndices) { @@ -360,6 +360,7 @@ std::vector groupPeaksToPoses( pose.keypoints[position] = candidates[peakIdx].pos; pose.keypoints[position].x += 0.5; pose.keypoints[position].y += 0.5; + pose.keypoints[position].score = candidates[peakIdx].score; } } poses.push_back(pose); diff --git a/vino_core_lib/src/outputs/image_window_output.cpp b/vino_core_lib/src/outputs/image_window_output.cpp index 906b780d..bcd6125d 100644 --- a/vino_core_lib/src/outputs/image_window_output.cpp +++ b/vino_core_lib/src/outputs/image_window_output.cpp @@ -356,7 +356,12 @@ void Outputs::ImageWindowOutput::decorateFrame() for (auto kp : o.kp) { if (kp.x >= 0) - cv::circle(frame_, kp, 3, cv::Scalar(255, 0, 0), 1); + { + std::string score = std::to_string(kp.score).substr(0, 3); + cv::circle(frame_, kp, 3, cv::Scalar(255, 0, 0), cv::FILLED); + cv::putText(frame_, score, kp + cv::Point2f(5, 0), + cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, o.scalar); + } } } diff --git a/vino_core_lib/src/outputs/ros_topic_output.cpp b/vino_core_lib/src/outputs/ros_topic_output.cpp index 7e2ae2a5..17f74726 100644 --- a/vino_core_lib/src/outputs/ros_topic_output.cpp +++ b/vino_core_lib/src/outputs/ros_topic_output.cpp @@ -221,12 +221,12 @@ void Outputs::RosTopicOutput::accept( hp.roi.height = loc.height; hp.score = r.getScore(); - auto p = geometry_msgs::Point(); - p.z = -1; + auto p = vino_people_msgs::HumanPoseKeypoint();// geometry_msgs::Point(); for (auto kp : r.keypoints) { p.x = kp.x; p.y = kp.y; + p.score = kp.score; hp.keypoints.push_back(p); } human_pose_msg_ptr_->humanposes.push_back(hp); diff --git a/vino_people_msgs/CMakeLists.txt b/vino_people_msgs/CMakeLists.txt index cf577f92..42d40f5d 100644 --- a/vino_people_msgs/CMakeLists.txt +++ b/vino_people_msgs/CMakeLists.txt @@ -36,6 +36,7 @@ add_message_files(DIRECTORY msg FILES ObjectsInMasks.msg Reidentification.msg ReidentificationStamped.msg + HumanPoseKeypoint.msg HumanPose.msg HumanPoseStamped.msg ) diff --git a/vino_people_msgs/msg/HumanPose.msg b/vino_people_msgs/msg/HumanPose.msg index 8ac1d524..cf5fc2ce 100644 --- a/vino_people_msgs/msg/HumanPose.msg +++ b/vino_people_msgs/msg/HumanPose.msg @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -geometry_msgs/Point[] keypoints # Keypoints +vino_people_msgs/HumanPoseKeypoint[] keypoints # Keypoints float32 score # score -sensor_msgs/RegionOfInterest roi # region of interest for a person's face \ No newline at end of file +sensor_msgs/RegionOfInterest roi # region of interest for a person's body \ No newline at end of file diff --git a/vino_people_msgs/msg/HumanPoseKeypoint.msg b/vino_people_msgs/msg/HumanPoseKeypoint.msg new file mode 100644 index 00000000..c0acbf43 --- /dev/null +++ b/vino_people_msgs/msg/HumanPoseKeypoint.msg @@ -0,0 +1,3 @@ +float64 x +float64 y +float64 score \ No newline at end of file diff --git a/vino_sample/src/image_human_pose_client.cpp b/vino_sample/src/image_human_pose_client.cpp index 42c136cc..88785679 100644 --- a/vino_sample/src/image_human_pose_client.cpp +++ b/vino_sample/src/image_human_pose_client.cpp @@ -48,7 +48,7 @@ int main(int argc, char ** argv) for (auto kp : srv.response.humanposes[i].keypoints) { - os << std::endl<< "\tx: " << kp.x << "\ty: " << kp.y << "\tz: " << kp.z; + os << std::endl<< "\tx: " << kp.x << "\ty: " << kp.y << "\tscore: " << kp.score; } ROS_INFO(os.str().c_str()); } From e8bb2bc7f36a8bcfce967ce924771a0888678218 Mon Sep 17 00:00:00 2001 From: Silas Alves Date: Tue, 21 Apr 2020 18:59:11 -0400 Subject: [PATCH 11/11] Fix ROS message. --- skel-track-notes.md | 26 ++++++++++++++++++- .../src/outputs/ros_topic_output.cpp | 15 ++++++++--- vino_people_msgs/msg/HumanPoseKeypoint.msg | 5 ++-- vino_sample/src/image_human_pose_client.cpp | 2 +- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/skel-track-notes.md b/skel-track-notes.md index f68e174d..46a1b036 100644 --- a/skel-track-notes.md +++ b/skel-track-notes.md @@ -7,4 +7,28 @@ ```bash $ cd /opt/openvino_toolkit/open_model_zoo/tools/downloader $ python3 downloader.py --name human-pose-estimation-0001 -``` \ No newline at end of file +``` + +## Keypoints + +| Id | Keypoint Name | +| -- | -- | +| 0 | nose | +| 1 | neck | +| 2 | right shoulder | +| 3 | right elbow | +| 4 | right wrist | +| 5 | left shoulder | +| 6 | left elbow | +| 7 | left wrist | +| 8 | right hip | +| 9 | right knee | +| 10 | right ankle | +| 11 | left hip | +| 12 | left knee | +| 13 | left ankle | +| 14 | right eye | +| 15 | left eye | +| 16 | right ear | +| 17 | left ear | + diff --git a/vino_core_lib/src/outputs/ros_topic_output.cpp b/vino_core_lib/src/outputs/ros_topic_output.cpp index 17f74726..3ef55bc3 100644 --- a/vino_core_lib/src/outputs/ros_topic_output.cpp +++ b/vino_core_lib/src/outputs/ros_topic_output.cpp @@ -222,11 +222,20 @@ void Outputs::RosTopicOutput::accept( hp.score = r.getScore(); auto p = vino_people_msgs::HumanPoseKeypoint();// geometry_msgs::Point(); + p.position.z = -1; for (auto kp : r.keypoints) { - p.x = kp.x; - p.y = kp.y; - p.score = kp.score; + p.position.x = kp.x; + p.position.y = kp.y; + if (kp.x >= 0) + { + p.score = kp.score; + } + else + { + p.score = 0; + } + hp.keypoints.push_back(p); } human_pose_msg_ptr_->humanposes.push_back(hp); diff --git a/vino_people_msgs/msg/HumanPoseKeypoint.msg b/vino_people_msgs/msg/HumanPoseKeypoint.msg index c0acbf43..a13caa02 100644 --- a/vino_people_msgs/msg/HumanPoseKeypoint.msg +++ b/vino_people_msgs/msg/HumanPoseKeypoint.msg @@ -1,3 +1,2 @@ -float64 x -float64 y -float64 score \ No newline at end of file +float64 score +geometry_msgs/Point position \ No newline at end of file diff --git a/vino_sample/src/image_human_pose_client.cpp b/vino_sample/src/image_human_pose_client.cpp index 88785679..dc2f6c88 100644 --- a/vino_sample/src/image_human_pose_client.cpp +++ b/vino_sample/src/image_human_pose_client.cpp @@ -48,7 +48,7 @@ int main(int argc, char ** argv) for (auto kp : srv.response.humanposes[i].keypoints) { - os << std::endl<< "\tx: " << kp.x << "\ty: " << kp.y << "\tscore: " << kp.score; + os << std::endl<< "\tx: " << kp.position.x << "\ty: " << kp.position.y << "\tscore: " << kp.score; } ROS_INFO(os.str().c_str()); }