diff --git a/data/images/human_pose_estimation.png b/data/images/human_pose_estimation.png new file mode 100644 index 00000000..ed65d5f7 Binary files /dev/null and b/data/images/human_pose_estimation.png differ diff --git a/dynamic_vino_lib/CMakeLists.txt b/dynamic_vino_lib/CMakeLists.txt index a38912e6..c84d8885 100644 --- a/dynamic_vino_lib/CMakeLists.txt +++ b/dynamic_vino_lib/CMakeLists.txt @@ -168,6 +168,8 @@ add_library(${PROJECT_NAME} SHARED src/inferences/landmarks_detection.cpp src/inferences/vehicle_attribs_detection.cpp src/inferences/license_plate_detection.cpp + src/inferences/peak.cpp + src/inferences/human_pose_estimation.cpp src/inputs/realsense_camera.cpp src/inputs/image_topic.cpp src/inputs/standard_camera.cpp @@ -180,6 +182,7 @@ add_library(${PROJECT_NAME} SHARED src/models/face_detection_model.cpp src/models/head_pose_detection_model.cpp src/models/object_detection_ssd_model.cpp + src/models/human_pose_estimation_model.cpp # src/models/object_detection_yolov2voc_model.cpp src/models/object_segmentation_model.cpp src/models/person_reidentification_model.cpp diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/inferences/base_inference.h b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/base_inference.h index 0fa21adf..dbaadd95 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/inferences/base_inference.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/base_inference.h @@ -87,6 +87,7 @@ class Result { return location_; } + inline void setLocation(const cv::Rect& location) { location_ = location; } private: cv::Rect location_; diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/inferences/human_pose_estimation.h b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/human_pose_estimation.h new file mode 100644 index 00000000..fa3a4a5a --- /dev/null +++ b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/human_pose_estimation.h @@ -0,0 +1,214 @@ +// TODO add license + +/** + * @brief A header file with declaration for AgeGenderDetection Class + * @file human_pose_estimation.h + */ +#ifndef DYNAMIC_VINO_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H +#define DYNAMIC_VINO_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H + +#include +#include +#include + +#include "dynamic_vino_lib/engines/engine.h" +#include "dynamic_vino_lib/inferences/base_inference.h" +#include "dynamic_vino_lib/models/human_pose_estimation_model.h" +#include "inference_engine.hpp" +#include "opencv2/opencv.hpp" + +namespace dynamic_vino_lib +{ + +class HumanPoseKeypoint : public cv::Point2f +{ +public: + HumanPoseKeypoint(float x, float y, float score) + : HumanPoseKeypoint::Point_(x, y), + score(score) {}; + HumanPoseKeypoint(cv::Point2f point) + : HumanPoseKeypoint::Point_(point), + score(score) {}; + float score; +}; + +/** + * @class HumanPoseResult + * @brief Class for storing and processing age and gender detection result. + */ +class HumanPoseResult : public Result +{ + public: + friend class HumanPoseEstimation; + explicit HumanPoseResult(const cv::Rect& location); + explicit HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, // = std::vector(), + const float& score); // = 0); + + // Following similar structure of dynamic_vino_lib/inferences/object_detection.h + // and human_pose_estimation_demo/src/human_pose_estimator.h + + /** + * @brief Get the age keypoints of the estimated pose from the result. + * @return The estimated keypoints. + */ + std::vector getKeypoints() const + { + return keypoints; + } + + /** + * @brief Get the score of the estimated pose from the result. + * @return The score of the estimation. + */ + float getScore() const + { + return score; + } + + std::vector keypointsScores; + std::vector keypoints; + float score = -1; +}; + + +/** + * @class HumanPoseEstimation + * @brief Class to load the human pose estimation model and perform + human pose estimation. + */ +class HumanPoseEstimation : public BaseInference +{ + public: + using Result = dynamic_vino_lib::HumanPoseResult; + HumanPoseEstimation(); + ~HumanPoseEstimation() override; + /** + * @brief Load the age gender detection model. + */ + void loadNetwork(std::shared_ptr); + /** + * @brief Enqueue a frame to this class. + * The frame will be buffered but not inferred yet. + * @param[in] frame The frame to be enqueued. + * @param[in] input_frame_loc The location of the enqueued frame with respect + * to the frame generated by the input device. + * @return Whether this operation is successful. + */ + bool enqueue(const cv::Mat& frame, const cv::Rect&) override; + /** + * @brief Start inference for all buffered frames. + * @return Whether this operation is successful. + */ + bool submitRequest() override; + /** + * @brief This function will fetch the results of the previous inference and + * stores the results in a result buffer array. All buffered frames will be + * cleared. + * @return Whether the Inference object fetches a result this time + */ + bool fetchResults() override; + /** + * @brief Get the length of the buffer result array. + * @return The length of the buffer result array. + */ + int getResultsLength() const override; + /** + * @brief Get the location of result with respect + * to the frame generated by the input device. + * @param[in] idx The index of the result. + */ + const dynamic_vino_lib::Result* getLocationResult(int idx) const override; + /** + * @brief Get the name of the Inference instance. + * @return The name of the Inference instance. + */ + const std::string getName() const override; + /** + * @brief Show the observed detection result either through image window + * or ROS topic. + */ + void observeOutput(const std::shared_ptr& output) override; + + const std::vector getFilteredROIs(const std::string filter_conditions) const override; + + /** + * @brief Processess the network's outputs to extract the valid poses. + * + * Copied from: https://github.com/opencv/open_model_zoo/blob/master/demos/human_pose_estimation_demo/src/human_pose_estimator.cpp + * + * @param heatMapsData Data outputted by the heatmap network. + * @param heatMapOffset Size of each heatmap result. + * @param nHeatMaps Number of keypoints. + * @param pafsData Data outputted by the PAF network. + * @param pafOffset Size of each PAF result. + * @param nPafs Numver of PAFs. + * @param featureMapWidth Width of the heatmap. + * @param featureMapHeight Height of the heatmap. + * @param imageSize Size of the input image. + * @return std::vector A vector with the detected poses. + */ + std::vector postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const; + + private: + + /** + * @brief Resizes the heatmap by upSampleRatio. + * + * @param featureMaps A vector with the heatmaps to resize. + */ + void resizeFeatureMaps(std::vector& featureMaps) const; + + /** + * @brief Extracts the poses from the given heatmaps and PAFs. + * + * @param heatMaps Postprocessed heatmaps. + * @param pafs Postprocessed PAFs. + * @return std::vector The detected poses. + */ + std::vector extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const; + + /** + * @brief Aligns the poses' keypoints to the input image. + * + * @param poses Poses (extracted from the heatmaps and PAFs). + * @param featureMapsSize The size of the heatmaps. + * @param imageSize The input image size. + */ + void correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const; + + /** + * @brief Correct the bonding boxes based on the poses' keypoints. + * + * @param poses Poses (with corrected keypoints). + */ + void correctROI(std::vector& poses) const; + + std::shared_ptr valid_model_; + std::vector results_; + int width_ = 0; + int height_ = 0; + + const size_t keypointsNumber_ = 18; + int upsampleRatio_ = 4; + float minPeaksDistance_ = 3.0; + float midPointsScoreThreshold_ = 0.05; + float foundMidPointsRatioThreshold_ = 0.8; + int minJointsNumber_ = 3; + float minSubsetScore_ = 0.2; + int stride_ = 8; + cv::Vec4i pad_ = cv::Vec4i::all(0); +}; + +} // namespace dynamic_vino_lib + +#endif // DYNAMIC_VINO_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H \ No newline at end of file diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/inferences/peak.h b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/peak.h new file mode 100644 index 00000000..ccb91378 --- /dev/null +++ b/dynamic_vino_lib/include/dynamic_vino_lib/inferences/peak.h @@ -0,0 +1,87 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +// #include "human_pose.hpp" +#include "dynamic_vino_lib/inferences/human_pose_estimation.h" + +namespace human_pose_estimation +{ + +using Result = dynamic_vino_lib::HumanPoseResult; + +struct Peak +{ + Peak(const int id = -1, + const cv::Point2f &pos = cv::Point2f(), + const float score = 0.0f); + + int id; + cv::Point2f pos; + float score; +}; + +struct HumanPoseByPeaksIndices +{ + explicit HumanPoseByPeaksIndices(const int keypointsNumber); + + std::vector peaksIndices; + int nJoints; + float score; +}; + +struct TwoJointsConnection +{ + TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score); + + int firstJointIdx; + int secondJointIdx; + float score; +}; + +void findPeaks(const std::vector &heatMaps, + const float minPeaksDistance, + std::vector> &allPeaks, + int heatMapId); + +std::vector groupPeaksToPoses( + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); + +class FindPeaksBody : public cv::ParallelLoopBody +{ +public: + FindPeaksBody(const std::vector &heatMaps, float minPeaksDistance, + std::vector> &peaksFromHeatMap) + : heatMaps(heatMaps), + minPeaksDistance(minPeaksDistance), + peaksFromHeatMap(peaksFromHeatMap) {} + + virtual void operator()(const cv::Range &range) const + { + for (int i = range.start; i < range.end; i++) + { + findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i); + } + } + +private: + const std::vector &heatMaps; + float minPeaksDistance; + std::vector> &peaksFromHeatMap; +}; + +} // namespace human_pose_estimation \ No newline at end of file diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/models/human_pose_estimation_model.h b/dynamic_vino_lib/include/dynamic_vino_lib/models/human_pose_estimation_model.h new file mode 100644 index 00000000..64f335a3 --- /dev/null +++ b/dynamic_vino_lib/include/dynamic_vino_lib/models/human_pose_estimation_model.h @@ -0,0 +1,69 @@ +// TODO: Add license + +/** + * @brief A header file with declaration for FaceDetectionModel Class + * @file human_pose_estimation_model.h + */ + +#ifndef DYNAMIC_VINO_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H +#define DYNAMIC_VINO_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H + +#include +#include "dynamic_vino_lib/models/base_model.h" + +namespace Models +{ +/** + * @class HumanPoseEstimationModel + * @brief This class generates the human pose estimation model. + */ +class HumanPoseEstimationModel : public BaseModel +{ +public: + HumanPoseEstimationModel(const std::string& model_loc, int batch_size = 1); + + /** + * @brief Get the input name. + * @return Input name. + */ + inline const std::string getInputName() const + { + return input_; + } + /** + * @brief Get the age from the detection reuslt. + * @return Detected age. + */ + inline const std::string getOutputKeypointsName() const + { + return output_keypoints_; + } + /** + * @brief Get the gender from the detection reuslt. + * @return Detected gender. + */ + inline const std::string getOutputHeatmapName() const + { + return output_heatmap_; + } + /** + * @brief Get the name of this detection model. + * @return Name of the model. + */ + // const std::string getModelName() const override; + const std::string getModelCategory() const override; + bool updateLayerProperty(InferenceEngine::CNNNetReader::Ptr) override; + +protected: + // void checkLayerProperty(const InferenceEngine::CNNNetReader::Ptr&) override; + // void setLayerProperty(InferenceEngine::CNNNetReader::Ptr) override; + + private: + std::string input_; + std::string output_keypoints_; + std::string output_heatmap_; +}; + +} // namespace Models + +#endif // DYNAMIC_VINO_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H \ No newline at end of file diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/base_output.h b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/base_output.h index 9a324ff3..3245aec3 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/base_output.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/base_output.h @@ -30,6 +30,8 @@ #include "dynamic_vino_lib/inferences/emotions_detection.h" #include "dynamic_vino_lib/inferences/face_detection.h" #include "dynamic_vino_lib/inferences/head_pose_detection.h" +#include "dynamic_vino_lib/inferences/peak.h" +#include "dynamic_vino_lib/inferences/human_pose_estimation.h" #include "dynamic_vino_lib/inferences/object_segmentation.h" #include "dynamic_vino_lib/inferences/person_reidentification.h" #include "dynamic_vino_lib/inferences/landmarks_detection.h" @@ -47,6 +49,7 @@ #include #include #include +#include class Pipeline; namespace Outputs @@ -127,6 +130,12 @@ class BaseOutput virtual void accept(const std::vector&) { } + /** + * @brief Generate output content according to the human pose estimation result. + */ + virtual void accept(const std::vector &) + { + } /** * @brief Generate output content according to the object segmentation result. */ @@ -167,6 +176,9 @@ class BaseOutput virtual void setServiceResponse(boost::shared_ptr response) { } + virtual void setServiceResponse(boost::shared_ptr response) + { + } virtual void setServiceResponse(boost::shared_ptr response) { } diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/image_window_output.h b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/image_window_output.h index 5e9faba7..4d564701 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/image_window_output.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/image_window_output.h @@ -101,6 +101,12 @@ class ImageWindowOutput : public BaseOutput * @param[in] A head pose detection result objetc. */ void accept(const std::vector&) override; + /** + * @brief Generate image window output content according to + * the human pose estimation result. + * @param[in] An human pose estimation result objetc. + */ + void accept(const std::vector &) override; /** * @brief Generate image window output content according to * the headpose detection result. @@ -161,6 +167,7 @@ class ImageWindowOutput : public BaseOutput cv::Point pa_top; // for person attributes, top position cv::Point pa_bottom; // for person attributes, bottom position std::vector landmarks; + std::vector kp; // for humanpose, keypoints }; std::vector outputs_; diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_service_output.h b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_service_output.h index cb0c0de7..52329e44 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_service_output.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_service_output.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -42,6 +44,7 @@ #include #include #include +#include #include @@ -79,6 +82,7 @@ class RosServiceOutput : public RosTopicOutput void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); + void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_topic_output.h b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_topic_output.h index 2f57fd77..560d9f1d 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_topic_output.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/ros_topic_output.h @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include #include @@ -140,6 +142,12 @@ class RosTopicOutput : public BaseOutput * @param[in] An head pose detection result objetc. */ void accept(const std::vector&) override; + /** + * @brief Generate ros topic infomation according to + * the human pose estimation result. + * @param[in] An human pose estimation result objetc. + */ + void accept(const std::vector&) override; /** * @brief Generate ros topic infomation according to * the headpose detection result. @@ -163,6 +171,8 @@ class RosTopicOutput : public BaseOutput std::shared_ptr age_gender_topic_; ros::Publisher pub_headpose_; std::shared_ptr headpose_topic_; + ros::Publisher pub_human_pose_; + std::shared_ptr human_pose_topic_; ros::Publisher pub_object_; std::shared_ptr detected_objects_topic_; ros::Publisher pub_person_reid_; diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/rviz_output.h b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/rviz_output.h index 3b62186e..9744d29e 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/outputs/rviz_output.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/outputs/rviz_output.h @@ -95,6 +95,12 @@ class RvizOutput : public BaseOutput * @param[in] A head pose detection result objetc. */ void accept(const std::vector&) override; + /** + * @brief Generate rviz output content according to + * the human pose estimation result. + * @param[in] An object segmentation result objetc. + */ + void accept(const std::vector &) override; /** * @brief Generate rviz output content according to * the headpose detection result. diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_manager.h b/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_manager.h index d131c530..57fcf2b5 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_manager.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_manager.h @@ -103,6 +103,8 @@ class PipelineManager createEmotionRecognition(const Params::ParamManager::InferenceRawData& infer); std::shared_ptr createHeadPoseEstimation(const Params::ParamManager::InferenceRawData& infer); + std::shared_ptr + createHumanPoseEstimation(const Params::ParamManager::InferenceRawData& infer); std::shared_ptr createObjectDetection(const Params::ParamManager::InferenceRawData& infer); std::shared_ptr diff --git a/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_params.h b/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_params.h index f1480d06..accdca6a 100644 --- a/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_params.h +++ b/dynamic_vino_lib/include/dynamic_vino_lib/pipeline_params.h @@ -53,6 +53,7 @@ const char kInferTpye_FaceDetection[] = "FaceDetection"; const char kInferTpye_AgeGenderRecognition[] = "AgeGenderRecognition"; const char kInferTpye_EmotionRecognition[] = "EmotionRecognition"; const char kInferTpye_HeadPoseEstimation[] = "HeadPoseEstimation"; +const char kInferTpye_HumanPoseEstimation[] = "HumanPoseEstimation"; const char kInferTpye_ObjectDetection[] = "ObjectDetection"; const char kInferTpye_ObjectSegmentation[] = "ObjectSegmentation"; const char kInferTpye_PersonReidentification[] = "PersonReidentification"; diff --git a/dynamic_vino_lib/src/inferences/human_pose_estimation.cpp b/dynamic_vino_lib/src/inferences/human_pose_estimation.cpp new file mode 100644 index 00000000..3f76bbd4 --- /dev/null +++ b/dynamic_vino_lib/src/inferences/human_pose_estimation.cpp @@ -0,0 +1,263 @@ +// TODO add license + +/** + * @brief a header file with declaration of HumanPoseResult class //TODO update + * @file human_pose_estimation.cpp + * + * This file was mostly based on age_gender_detection.cpp. It also uses the code + * from object_detection.cpp. + */ + +#include +#include + +#include "dynamic_vino_lib/inferences/human_pose_estimation.h" +#include "dynamic_vino_lib/outputs/base_output.h" +#include "dynamic_vino_lib/inferences/peak.h" + +// HumanPoseResult +dynamic_vino_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) + : Result(location) +{ +} + +dynamic_vino_lib::HumanPoseResult::HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, + const float& score) + : keypoints(keypoints), + score(score), + Result(location) +{ +} + +dynamic_vino_lib::HumanPoseEstimation::HumanPoseEstimation() : dynamic_vino_lib::BaseInference() +{ +} + +dynamic_vino_lib::HumanPoseEstimation::~HumanPoseEstimation() = default; + +void dynamic_vino_lib::HumanPoseEstimation::loadNetwork( + std::shared_ptr network) +{ + valid_model_ = network; + setMaxBatchSize(network->getMaxBatchSize()); +} + +bool dynamic_vino_lib::HumanPoseEstimation::enqueue( + const cv::Mat& frame, const cv::Rect& input_frame_loc) +{ + // object_detection.cpp + if (width_ == 0 && height_ == 0) + { + width_ = frame.cols; + height_ = frame.rows; + } + + if (!dynamic_vino_lib::BaseInference::enqueue( + frame, input_frame_loc, 1, 0, valid_model_->getInputName())) + { + return false; + } + Result r(input_frame_loc); + results_.clear(); + results_.emplace_back(r); + return true; +} + +bool dynamic_vino_lib::HumanPoseEstimation::submitRequest() +{ + return dynamic_vino_lib::BaseInference::submitRequest(); +} + +bool dynamic_vino_lib::HumanPoseEstimation::fetchResults() +{ + bool can_fetch = dynamic_vino_lib::BaseInference::fetchResults(); + if (!can_fetch) return false; + auto request = getEngine()->getRequest(); + InferenceEngine::Blob::Ptr keypointsBlob = + request->GetBlob(valid_model_->getOutputKeypointsName()); + InferenceEngine::Blob::Ptr heatmapBlob = + request->GetBlob(valid_model_->getOutputHeatmapName()); + + results_.clear(); + CV_Assert(heatmapBlob->getTensorDesc().getDims()[1] == keypointsNumber_ + 1); + InferenceEngine::SizeVector heatMapDims = + heatmapBlob->getTensorDesc().getDims(); + //std::vector poses = postprocess( + results_ = postprocess( + heatmapBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsNumber_, + keypointsBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsBlob->getTensorDesc().getDims()[1], + heatMapDims[3], heatMapDims[2], cv::Size(width_, height_)); + return true; +} + +int dynamic_vino_lib::HumanPoseEstimation::getResultsLength() const +{ + return static_cast(results_.size()); +} + +const dynamic_vino_lib::Result* +dynamic_vino_lib::HumanPoseEstimation::getLocationResult(int idx) const +{ + return &(results_[idx]); +} + +const std::string dynamic_vino_lib::HumanPoseEstimation::getName() const +{ + return valid_model_->getModelName(); +} + +void dynamic_vino_lib::HumanPoseEstimation::observeOutput( + const std::shared_ptr& output) +{ + if (output != nullptr) + { + output->accept(results_); + } +} + +const std::vector +dynamic_vino_lib::HumanPoseEstimation::getFilteredROIs(const std::string filter_conditions) const +{ + if (!filter_conditions.empty()) + { + slog::err << "Headpose detection does not support filtering now! " + << "Filter conditions: " << filter_conditions << slog::endl; + } + std::vector filtered_rois; + for (auto res : results_) + { + filtered_rois.push_back(res.getLocation()); + } + return filtered_rois; +} + +using Result = dynamic_vino_lib::HumanPoseResult; + +std::vector dynamic_vino_lib::HumanPoseEstimation::postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const +{ + std::vector heatMaps(nHeatMaps); + for (size_t i = 0; i < heatMaps.size(); i++) + { + heatMaps[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + heatMapsData + i * heatMapOffset))); + } + + resizeFeatureMaps(heatMaps); + + std::vector pafs(nPafs); + for (size_t i = 0; i < pafs.size(); i++) + { + pafs[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + pafsData + i * pafOffset))); + } + resizeFeatureMaps(pafs); + + std::vector poses = extractPoses(heatMaps, pafs); + correctCoordinates(poses, heatMaps[0].size(), imageSize); + correctROI(poses); + return poses; +} + +std::vector dynamic_vino_lib::HumanPoseEstimation::extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const +{ + std::vector> peaksFromHeatMap(heatMaps.size()); + human_pose_estimation::FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance_, peaksFromHeatMap); + cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), findPeaksBody); + int peaksBefore = 0; + for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) + { + peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); + for (auto& peak : peaksFromHeatMap[heatmapId]) + { + peak.id += peaksBefore; + } + } + std::vector poses = groupPeaksToPoses( + peaksFromHeatMap, pafs, keypointsNumber_, midPointsScoreThreshold_, + foundMidPointsRatioThreshold_, minJointsNumber_, minSubsetScore_); + return poses; +} + +void dynamic_vino_lib::HumanPoseEstimation::resizeFeatureMaps( + std::vector& featureMaps) const +{ + for (auto& featureMap : featureMaps) + { + cv::resize(featureMap, featureMap, cv::Size(), + upsampleRatio_, upsampleRatio_, cv::INTER_CUBIC); + } +} + +void dynamic_vino_lib::HumanPoseEstimation::correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const +{ + CV_Assert(stride_ % upsampleRatio_ == 0); + + cv::Size fullFeatureMapSize = featureMapsSize * stride_ / upsampleRatio_; + + float scaleX = imageSize.width / + static_cast(fullFeatureMapSize.width - pad_(1) - pad_(3)); + float scaleY = imageSize.height / + static_cast(fullFeatureMapSize.height - pad_(0) - pad_(2)); + for (auto& pose : poses) + { + for (auto& keypoint : pose.keypoints) + { + if (keypoint != cv::Point2f(-1, -1)) + { + keypoint.x *= stride_ / upsampleRatio_; + keypoint.x -= pad_(1); + keypoint.x *= scaleX; + + keypoint.y *= stride_ / upsampleRatio_; + keypoint.y -= pad_(0); + keypoint.y *= scaleY; + } + } + } +} + +void dynamic_vino_lib::HumanPoseEstimation::correctROI( + std::vector& poses) const +{ + for (auto& pose : poses) + { + int xMin = width_; + int xMax = 0; + int yMin = height_; + int yMax = 0; + for (auto& kp: pose.keypoints) + { + if (kp.x < 0) continue; + + int x = static_cast(kp.x); + int y = static_cast(kp.y); + + if (x > xMax) xMax = x; + if (x < xMin) xMin = x; + + if (y > yMax) yMax = y; + if (y < yMin) yMin = y; + } + // slog::info << "rect at: (" << xMin << ", " << yMin << "), (" << xMax << ", " << yMax << ")" << slog::endl; + cv::Rect newLocation = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin); + pose.setLocation(newLocation); + } +} \ No newline at end of file diff --git a/dynamic_vino_lib/src/inferences/peak.cpp b/dynamic_vino_lib/src/inferences/peak.cpp new file mode 100644 index 00000000..b40bcfb0 --- /dev/null +++ b/dynamic_vino_lib/src/inferences/peak.cpp @@ -0,0 +1,370 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "dynamic_vino_lib/inferences/peak.h" + +namespace human_pose_estimation +{ +Peak::Peak(const int id, const cv::Point2f &pos, const float score) + : id(id), + pos(pos), + score(score) {} + +HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber) + : peaksIndices(std::vector(keypointsNumber, -1)), + nJoints(0), + score(0.0f) {} + +TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score) + : firstJointIdx(firstJointIdx), + secondJointIdx(secondJointIdx), + score(score) {} + +void findPeaks(const std::vector &heatMaps, + const float minPeaksDistance, + std::vector> &allPeaks, + int heatMapId) +{ + const float threshold = 0.1f; + std::vector peaks; + const cv::Mat &heatMap = heatMaps[heatMapId]; + const float *heatMapData = heatMap.ptr(); + size_t heatMapStep = heatMap.step1(); + for (int y = -1; y < heatMap.rows + 1; y++) + { + for (int x = -1; x < heatMap.cols + 1; x++) + { + float val = 0; + if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) + { + val = heatMapData[y * heatMapStep + x]; + val = val >= threshold ? val : 0; + } + + float left_val = 0; + if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) + { + left_val = heatMapData[y * heatMapStep + x + 1]; + left_val = left_val >= threshold ? left_val : 0; + } + + float right_val = 0; + if (x > 0 && y >= 0 && y < heatMap.rows) + { + right_val = heatMapData[y * heatMapStep + x - 1]; + right_val = right_val >= threshold ? right_val : 0; + } + + float top_val = 0; + if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) + { + top_val = heatMapData[(y + 1) * heatMapStep + x]; + top_val = top_val >= threshold ? top_val : 0; + } + + float bottom_val = 0; + if (x >= 0 && y > 0 && x < heatMap.cols) + { + bottom_val = heatMapData[(y - 1) * heatMapStep + x]; + bottom_val = bottom_val >= threshold ? bottom_val : 0; + } + + if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) + { + peaks.push_back(cv::Point(x, y)); + } + } + } + std::sort(peaks.begin(), peaks.end(), [](const cv::Point &a, const cv::Point &b) { + return a.x < b.x; + }); + std::vector isActualPeak(peaks.size(), true); + int peakCounter = 0; + std::vector &peaksWithScoreAndID = allPeaks[heatMapId]; + for (size_t i = 0; i < peaks.size(); i++) + { + if (isActualPeak[i]) + { + for (size_t j = i + 1; j < peaks.size(); j++) + { + if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + + (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) + { + isActualPeak[j] = false; + } + } + peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at(peaks[i]))); + } + } +} + +std::vector groupPeaksToPoses( + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore) +{ + const std::vector> limbIdsHeatmap = { + {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, + {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, {2, 1}, + {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18}}; + const std::vector> limbIdsPaf = { + {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, + {21, 22}, {23, 24}, {25, 26}, {27, 28}, {29, 30}, {47, 48}, {49, 50}, + {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46}}; + + std::vector candidates; + for (const auto &peaks : allPeaks) + { + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + } + std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t k = 0; k < limbIdsPaf.size(); k++) + { + std::vector connections; + const int mapIdxOffset = keypointsNumber + 1; + std::pair scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset]}; + const int idxJointA = limbIdsHeatmap[k].first - 1; + const int idxJointB = limbIdsHeatmap[k].second - 1; + const std::vector &candA = allPeaks[idxJointA]; + const std::vector &candB = allPeaks[idxJointB]; + const size_t nJointsA = candA.size(); + const size_t nJointsB = candB.size(); + if (nJointsA == 0 && nJointsB == 0) + { + continue; + } + else if (nJointsA == 0) + { + for (size_t i = 0; i < nJointsB; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointB] == candB[i].id) + { + num++; + continue; + } + } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointB] = candB[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candB[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + else if (nJointsB == 0) + { + for (size_t i = 0; i < nJointsA; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == candA[i].id) + { + num++; + continue; + } + } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointA] = candA[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candA[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + + std::vector tempJointConnections; + for (size_t i = 0; i < nJointsA; i++) + { + for (size_t j = 0; j < nJointsB; j++) + { + cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; + cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); + cv::Point2f vec = candB[j].pos - candA[i].pos; + double norm_vec = cv::norm(vec); + if (norm_vec == 0) + { + continue; + } + vec /= norm_vec; + float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); + int height_n = pafs[0].rows / 2; + float suc_ratio = 0.0f; + float mid_score = 0.0f; + const int mid_num = 10; + const float scoreThreshold = -100.0f; + if (score > scoreThreshold) + { + float p_sum = 0; + int p_count = 0; + cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1), + (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1)); + for (int n = 0; n < mid_num; n++) + { + cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), + cvRound(candA[i].pos.y + n * step.height)); + cv::Point2f pred(scoreMid.first.at(midPoint), + scoreMid.second.at(midPoint)); + score = vec.x * pred.x + vec.y * pred.y; + if (score > midPointsScoreThreshold) + { + p_sum += score; + p_count++; + } + } + suc_ratio = static_cast(p_count / mid_num); + float ratio = p_count > 0 ? p_sum / p_count : 0.0f; + mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); + } + if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) + { + tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); + } + } + } + if (!tempJointConnections.empty()) + { + std::sort(tempJointConnections.begin(), tempJointConnections.end(), + [](const TwoJointsConnection &a, + const TwoJointsConnection &b) { + return (a.score > b.score); + }); + } + size_t num_limbs = std::min(nJointsA, nJointsB); + size_t cnt = 0; + std::vector occurA(nJointsA, 0); + std::vector occurB(nJointsB, 0); + for (size_t row = 0; row < tempJointConnections.size(); row++) + { + if (cnt == num_limbs) + { + break; + } + const int &indexA = tempJointConnections[row].firstJointIdx; + const int &indexB = tempJointConnections[row].secondJointIdx; + const float &score = tempJointConnections[row].score; + if (occurA[indexA] == 0 && occurB[indexB] == 0) + { + connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); + cnt++; + occurA[indexA] = 1; + occurB[indexB] = 1; + } + } + if (connections.empty()) + { + continue; + } + + bool extraJointConnections = (k == 17 || k == 18); + if (k == 0) + { + subset = std::vector( + connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + subset[i].peaksIndices[idxJointA] = indexA; + subset[i].peaksIndices[idxJointB] = indexB; + subset[i].nJoints = 2; + subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + } + } + else if (extraJointConnections) + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) + { + subset[j].peaksIndices[idxJointB] = indexB; + } + else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) + { + subset[j].peaksIndices[idxJointA] = indexA; + } + } + } + continue; + } + else + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + bool num = false; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA) + { + subset[j].peaksIndices[idxJointB] = indexB; + subset[j].nJoints++; + subset[j].score += candidates[indexB].score + connections[i].score; + num = true; + } + } + if (!num) + { + HumanPoseByPeaksIndices hpWithScore(keypointsNumber); + hpWithScore.peaksIndices[idxJointA] = indexA; + hpWithScore.peaksIndices[idxJointB] = indexB; + hpWithScore.nJoints = 2; + hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + subset.push_back(hpWithScore); + } + } + } + } + std::vector poses; + for (const auto &subsetI : subset) + { + if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) + { + continue; + } + int position = -1; + Result pose(cv::Rect(), + std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + subsetI.score * std::max(0, subsetI.nJoints - 1)); + for (const auto &peakIdx : subsetI.peaksIndices) + { + position++; + if (peakIdx >= 0) + { + pose.keypoints[position] = candidates[peakIdx].pos; + pose.keypoints[position].x += 0.5; + pose.keypoints[position].y += 0.5; + pose.keypoints[position].score = candidates[peakIdx].score; + } + } + poses.push_back(pose); + } + return poses; +} +} // namespace human_pose_estimation \ No newline at end of file diff --git a/dynamic_vino_lib/src/models/human_pose_estimation_model.cpp b/dynamic_vino_lib/src/models/human_pose_estimation_model.cpp new file mode 100644 index 00000000..95cee2f4 --- /dev/null +++ b/dynamic_vino_lib/src/models/human_pose_estimation_model.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "dynamic_vino_lib/models/human_pose_estimation_model.h" +#include "dynamic_vino_lib/slog.h" + +// Validated Human Pose Network +Models::HumanPoseEstimationModel::HumanPoseEstimationModel(const std::string& model_loc, int max_batch_size) + : BaseModel(model_loc, max_batch_size) +{ +} + +bool Models::HumanPoseEstimationModel::updateLayerProperty(InferenceEngine::CNNNetReader::Ptr net_reader) +{ + slog::info << "Checking INPUTs for model " << getModelName() << slog::endl; + // set input property + InferenceEngine::InputsDataMap input_info_map(net_reader->getNetwork().getInputsInfo()); + if (input_info_map.size() != 1) + { + slog::warn << "This model should have only one input, but we got" << std::to_string(input_info_map.size()) + << "inputs" << slog::endl; + return false; + } + InferenceEngine::InputInfo::Ptr input_info = input_info_map.begin()->second; + input_info->setPrecision(InferenceEngine::Precision::U8); + input_info->getInputData()->setLayout(InferenceEngine::Layout::NCHW); + addInputInfo("input", input_info_map.begin()->first); + + InferenceEngine::OutputsDataMap output_info_map(net_reader->getNetwork().getOutputsInfo()); + + auto it = output_info_map.begin(); + InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; + InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + keypoints_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + keypoints_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + heatmap_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + heatmap_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + // set input and output layer name + input_ = input_info_map.begin()->first; + + output_keypoints_ = keypoints_output_ptr->getName(); + output_heatmap_ = heatmap_output_ptr->getName(); +// addOutputInfo("output_keypoints_", keypoints_output_ptr->getName()); +// addOutputInfo("output_heatmap_", keypoints_output_ptr->getName()); + +// it = output_info_map.begin(); +// InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; +// InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + + if (keypoints_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, PAF layer (" + + keypoints_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + keypoints_output_ptr->getCreatorLayer().lock()->type); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + heatmap_output_ptr->getCreatorLayer().lock()->type); + } + + if (keypoints_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + keypoints_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(keypoints_output_ptr->getCreatorLayer().lock()->outData.size())); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(heatmap_output_ptr->getCreatorLayer().lock()->outData.size())); + } + + if (keypoints_output_ptr->getCreatorLayer().lock()->outData[0]->getDims().size() == 4 && + keypoints_output_ptr->getCreatorLayer().lock()->outData[0]->getDims()[2] == 19) + { + std::swap(keypoints_output_ptr, heatmap_output_ptr); + } + + auto pafDims = keypoints_output_ptr->getCreatorLayer().lock()->outData[0]->getDims(); + auto heatDims = heatmap_output_ptr->getCreatorLayer().lock()->outData[0]->getDims(); + + // if (pafDims.size() != 4 || pafDims[0] != 57 || pafDims[1] != 32 + // || pafDims[2] != 38 || pafDims[3] != 1) + if (pafDims.size() != 4 || pafDims[0] != 1 || pafDims[1] != 38 + || pafDims[2] != 32 || pafDims[3] != 57) + { + std::ostringstream size; + size << "[ "; + for (size_t s : pafDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + keypoints_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 38 1 ], but had: " + size.str()); + } + + // if (heatDims.size() != 4 || heatDims[0] != 57 || heatDims[1] != 32 + // || heatDims[2] != 19 || heatDims[3] != 1) + if (heatDims.size() != 4 || heatDims[0] != 1 || heatDims[1] != 19 + || heatDims[2] != 32 || heatDims[3] != 57) + { + std::ostringstream size; + size << "[ "; + for (size_t s : heatDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 19 1 ], but had: " + size.str()); + } + + slog::info << "PAF layer: " + << keypoints_output_ptr->getCreatorLayer().lock()->name + << slog::endl; + slog::info << "Heatmap layer: " + << heatmap_output_ptr->getCreatorLayer().lock()->name + << slog::endl; + + printAttribute(); + return true; +} + +const std::string Models::HumanPoseEstimationModel::getModelCategory() const +{ + return "Head Pose Network"; +} \ No newline at end of file diff --git a/dynamic_vino_lib/src/outputs/image_window_output.cpp b/dynamic_vino_lib/src/outputs/image_window_output.cpp index d578de78..1703a919 100644 --- a/dynamic_vino_lib/src/outputs/image_window_output.cpp +++ b/dynamic_vino_lib/src/outputs/image_window_output.cpp @@ -190,6 +190,21 @@ void Outputs::ImageWindowOutput::accept(const std::vector& results) +{ + OutputData output; + output.scalar = cv::Scalar(255, 0, 0); + for (unsigned i = 0; i < results.size(); i++) { + cv::Rect result_rect = results[i].getLocation(); + auto score = std::to_string(results[i].getScore()); + + output.rect = result_rect; + output.desc = "Pose " + std::to_string(i) + " [" + score.substr(0, score.find(".") + 2) + "]"; + output.kp = results[i].getKeypoints(); + outputs_.push_back(output); + } +} + void Outputs::ImageWindowOutput::accept(const std::vector& results) { for (unsigned i = 0; i < results.size(); i++) @@ -321,10 +336,59 @@ void Outputs::ImageWindowOutput::decorateFrame() cv::line(frame_, o.hp_zs, o.hp_ze, cv::Scalar(255, 0, 0), 2); cv::circle(frame_, o.hp_ze, 3, cv::Scalar(255, 0, 0), 2); } + for (auto kp : o.kp) + { + if (kp.x >= 0) + { + std::string score = std::to_string(kp.score).substr(0, 3); + cv::circle(frame_, kp, 3, cv::Scalar(255, 0, 0), cv::FILLED); + cv::putText(frame_, score, kp + cv::Point2f(5, 0), + cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, o.scalar); + } + } + const cv::Point2f absentKeypoint(-1.0f, -1.0f); + const int stickWidth = 4; + static const cv::Scalar colors[18] = { + cv::Scalar(255, 0, 0), cv::Scalar(255, 85, 0), cv::Scalar(255, 170, 0), + cv::Scalar(255, 255, 0), cv::Scalar(170, 255, 0), cv::Scalar(85, 255, 0), + cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 85), cv::Scalar(0, 255, 170), + cv::Scalar(0, 255, 255), cv::Scalar(0, 170, 255), cv::Scalar(0, 85, 255), + cv::Scalar(0, 0, 255), cv::Scalar(85, 0, 255), cv::Scalar(170, 0, 255), + cv::Scalar(255, 0, 255), cv::Scalar(255, 0, 170), cv::Scalar(255, 0, 85) + }; + static const std::pair limbKeypointsIds[] = { + {1, 2}, {1, 5}, {2, 3}, + {3, 4}, {5, 6}, {6, 7}, + {1, 8}, {8, 9}, {9, 10}, + {1, 11}, {11, 12}, {12, 13}, + {1, 0}, {0, 14}, {14, 16}, + {0, 15}, {15, 17} + }; + for (const auto& limbKeypointsId : limbKeypointsIds) + { + std::pair limbKeypoints(o.kp[limbKeypointsId.first], + o.kp[limbKeypointsId.second]); + if (limbKeypoints.first == absentKeypoint || limbKeypoints.second == absentKeypoint) + { + continue; + } - for (int i = 0; i < o.landmarks.size(); i++) + float meanX = (limbKeypoints.first.x + limbKeypoints.second.x) / 2; + float meanY = (limbKeypoints.first.y + limbKeypoints.second.y) / 2; + cv::Point difference = limbKeypoints.first - limbKeypoints.second; + double length = std::sqrt(difference.x * difference.x + difference.y * difference.y); + int angle = static_cast(std::atan2(difference.y, difference.x) * 180 / CV_PI); + std::vector polygon; + cv::ellipse2Poly(cv::Point2d(meanX, meanY), cv::Size2d(length / 2, stickWidth), + angle, 0, 360, 1, polygon); + cv::fillConvexPoly(frame_, polygon, colors[limbKeypointsId.second]); + } + //TODO Corsair-cxs + auto faceBoundingBoxWidth = o.rect.width; + int lmRadius = static_cast(0.01 * faceBoundingBoxWidth + 1); + for (unsigned long i = 0; i < o.landmarks.size(); i++) { - cv::circle(frame_, o.landmarks[i], 3, cv::Scalar(255, 0, 0), 2); + cv::circle(frame_, o.landmarks[i], lmRadius, cv::Scalar(0, 255, 255), -1); } } diff --git a/dynamic_vino_lib/src/outputs/ros_service_output.cpp b/dynamic_vino_lib/src/outputs/ros_service_output.cpp index 7e5a1dcc..3506f830 100644 --- a/dynamic_vino_lib/src/outputs/ros_service_output.cpp +++ b/dynamic_vino_lib/src/outputs/ros_service_output.cpp @@ -74,6 +74,16 @@ void Outputs::RosServiceOutput::setServiceResponse(boost::shared_ptr response) +{ + slog::info << "in Human Pose Estimation service::Response ..."; + if (human_pose_topic_ != nullptr) + { + response->humanposes = human_pose_topic_->humanposes; + } + slog::info << "DONE!" << slog::endl; +} + void Outputs::RosServiceOutput::setServiceResponse(boost::shared_ptr response) { slog::info << "in ObjectsInMasks service::Response ..."; diff --git a/dynamic_vino_lib/src/outputs/ros_topic_output.cpp b/dynamic_vino_lib/src/outputs/ros_topic_output.cpp index b98a99ac..e8642653 100644 --- a/dynamic_vino_lib/src/outputs/ros_topic_output.cpp +++ b/dynamic_vino_lib/src/outputs/ros_topic_output.cpp @@ -32,6 +32,7 @@ Outputs::RosTopicOutput::RosTopicOutput(std::string pipeline_name) : pipeline_na pub_age_gender_ = nh_.advertise("/openvino_toolkit/" + pipeline_name_ + "/age_genders", 16); pub_headpose_ = nh_.advertise("/openvino_toolkit/" + pipeline_name_ + "/headposes", 16); + pub_human_pose_ = nh_.advertise("/openvino_toolkit/" + pipeline_name_ + "/human_poses", 16); pub_object_ = nh_.advertise("/openvino_toolkit/" + pipeline_name_ + "/detected_objects", 16); pub_person_reid_ = nh_.advertise( @@ -61,6 +62,7 @@ Outputs::RosTopicOutput::RosTopicOutput(std::string pipeline_name) : pipeline_na license_plate_topic_ = NULL; vehicle_attribs_topic_ = NULL; landmarks_topic_ = NULL; + human_pose_topic_ = NULL; } void Outputs::RosTopicOutput::feedFrame(const cv::Mat& frame) @@ -262,6 +264,42 @@ void Outputs::RosTopicOutput::accept(const std::vector& results) +{ + human_pose_topic_ = std::make_shared(); + + for (auto r : results) + { + people_msgs::HumanPose hp; + + auto loc = r.getLocation(); + hp.roi.x_offset = loc.x; + hp.roi.y_offset = loc.y; + hp.roi.width = loc.width; + hp.roi.height = loc.height; + hp.score = r.getScore(); + + auto p = people_msgs::HumanPoseKeypoint();// geometry_msgs::Point(); + p.position.z = -1; + for (auto kp : r.keypoints) + { + p.position.x = kp.x; + p.position.y = kp.y; + if (kp.x >= 0) + { + p.score = kp.score; + } + else + { + p.score = 0; + } + + hp.keypoints.push_back(p); + } + human_pose_topic_->humanposes.push_back(hp); + } +} + void Outputs::RosTopicOutput::accept(const std::vector& results) { detected_objects_topic_ = std::make_shared(); @@ -385,6 +423,15 @@ void Outputs::RosTopicOutput::handleOutput() pub_headpose_.publish(headpose_msg); headpose_topic_ = nullptr; } + if (human_pose_topic_ != nullptr) + { + people_msgs::HumanPoseStamped human_pose_msg; + human_pose_msg.header = header; + human_pose_msg.humanposes.swap(human_pose_topic_->humanposes); + + pub_human_pose_.publish(human_pose_msg); + human_pose_topic_ = nullptr; + } if (detected_objects_topic_ != nullptr) { object_msgs::ObjectsInBoxes object_msg; diff --git a/dynamic_vino_lib/src/outputs/rviz_output.cpp b/dynamic_vino_lib/src/outputs/rviz_output.cpp index e04a6076..d8532320 100644 --- a/dynamic_vino_lib/src/outputs/rviz_output.cpp +++ b/dynamic_vino_lib/src/outputs/rviz_output.cpp @@ -81,6 +81,11 @@ void Outputs::RvizOutput::accept(const std::vectoraccept(results); } +void Outputs::RvizOutput::accept(const std::vector& results) +{ + image_window_output_->accept(results); +} + void Outputs::RvizOutput::accept(const std::vector& results) { image_window_output_->accept(results); diff --git a/dynamic_vino_lib/src/pipeline_manager.cpp b/dynamic_vino_lib/src/pipeline_manager.cpp index 0f6ba839..72fc6127 100644 --- a/dynamic_vino_lib/src/pipeline_manager.cpp +++ b/dynamic_vino_lib/src/pipeline_manager.cpp @@ -29,6 +29,8 @@ #include "dynamic_vino_lib/inferences/head_pose_detection.h" #include "dynamic_vino_lib/inferences/face_reidentification.h" #include "dynamic_vino_lib/inferences/person_attribs_detection.h" +#include "dynamic_vino_lib/inferences/peak.h" +#include "dynamic_vino_lib/inferences/human_pose_estimation.h" #include "dynamic_vino_lib/inferences/vehicle_attribs_detection.h" #include "dynamic_vino_lib/inferences/license_plate_detection.h" #include "dynamic_vino_lib/inferences/landmarks_detection.h" @@ -42,6 +44,7 @@ #include "dynamic_vino_lib/models/emotion_detection_model.h" #include "dynamic_vino_lib/models/face_detection_model.h" #include "dynamic_vino_lib/models/head_pose_detection_model.h" +#include "dynamic_vino_lib/models/human_pose_estimation_model.h" #include "dynamic_vino_lib/models/object_detection_ssd_model.h" // #include "dynamic_vino_lib/models/object_detection_yolov2voc_model.h" #include "dynamic_vino_lib/models/face_reidentification_model.h" @@ -239,6 +242,10 @@ PipelineManager::parseInference(const Params::ParamManager::PipelineRawData& par { object = createHeadPoseEstimation(infer); } + else if (infer.name == kInferTpye_HumanPoseEstimation) + { + object = createHumanPoseEstimation(infer); + } else if (infer.name == kInferTpye_ObjectDetection) { object = createObjectDetection(infer); @@ -331,6 +338,22 @@ PipelineManager::createHeadPoseEstimation(const Params::ParamManager::InferenceR return infer; } +std::shared_ptr +PipelineManager::createHumanPoseEstimation( + const Params::ParamManager::InferenceRawData & infer_param) +{ + auto model = std::make_shared(infer_param.model, infer_param.batch); + model->modelInit(); + auto engine = engine_manager_.createEngine(infer_param.engine, model); + auto infer = std::make_shared(); + infer->loadNetwork(model); + infer->loadEngine(engine); + + // auto infer = std::make_shared(); + return infer; + +} + std::shared_ptr PipelineManager::createObjectDetection(const Params::ParamManager::InferenceRawData& infer) { diff --git a/people_msgs/CMakeLists.txt b/people_msgs/CMakeLists.txt index 6f50c35e..4e351e1e 100644 --- a/people_msgs/CMakeLists.txt +++ b/people_msgs/CMakeLists.txt @@ -31,6 +31,9 @@ add_message_files(DIRECTORY msg FILES EmotionsStamped.msg HeadPose.msg HeadPoseStamped.msg + HumanPose.msg + HumanPoseKeypoint.msg + HumanPoseStamped.msg PersonsStamped.msg ObjectInMask.msg ObjectsInMasks.msg @@ -50,6 +53,7 @@ add_service_files(FILES AgeGenderSrv.srv EmotionSrv.srv HeadPoseSrv.srv + HumanPoseSrv.srv PeopleSrv.srv ReidentificationSrv.srv ObjectsInMasksSrv.srv diff --git a/people_msgs/msg/HumanPose.msg b/people_msgs/msg/HumanPose.msg new file mode 100644 index 00000000..78452da2 --- /dev/null +++ b/people_msgs/msg/HumanPose.msg @@ -0,0 +1,17 @@ +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +people_msgs/HumanPoseKeypoint[] keypoints # Keypoints +float32 score # score +sensor_msgs/RegionOfInterest roi # region of interest for a person's body \ No newline at end of file diff --git a/people_msgs/msg/HumanPoseKeypoint.msg b/people_msgs/msg/HumanPoseKeypoint.msg new file mode 100644 index 00000000..a13caa02 --- /dev/null +++ b/people_msgs/msg/HumanPoseKeypoint.msg @@ -0,0 +1,2 @@ +float64 score +geometry_msgs/Point position \ No newline at end of file diff --git a/people_msgs/msg/HumanPoseStamped.msg b/people_msgs/msg/HumanPoseStamped.msg new file mode 100644 index 00000000..0a8bb2d9 --- /dev/null +++ b/people_msgs/msg/HumanPoseStamped.msg @@ -0,0 +1,16 @@ +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +std_msgs/Header header +people_msgs/HumanPose[] humanposes \ No newline at end of file diff --git a/people_msgs/srv/HumanPoseSrv.srv b/people_msgs/srv/HumanPoseSrv.srv new file mode 100644 index 00000000..4f364307 --- /dev/null +++ b/people_msgs/srv/HumanPoseSrv.srv @@ -0,0 +1,17 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +string image_path # input: an image +--- +HumanPose[] humanposes # ObjectInBox array \ No newline at end of file diff --git a/vino_launch/launch/pipeline_human_pose.launch b/vino_launch/launch/pipeline_human_pose.launch new file mode 100644 index 00000000..71f74406 --- /dev/null +++ b/vino_launch/launch/pipeline_human_pose.launch @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/vino_launch/param/pipeline_human_pose.yaml b/vino_launch/param/pipeline_human_pose.yaml new file mode 100644 index 00000000..db05c2c5 --- /dev/null +++ b/vino_launch/param/pipeline_human_pose.yaml @@ -0,0 +1,28 @@ +Pipelines: +- name: people + # inputs: [StandardCamera] + inputs: [Image] + input_path: /home/cxs/my_code/ros_vino_ws/src/ros_openvino_toolkit/data/images/teams2.png + # inputs: [Video] + # input_path: /home/cxs/Videos/MOT16-06-raw.webm + infers: + - name: HumanPoseEstimation + model: /opt/openvino_toolkit/models/intel/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml + engine: CPU + label: to/be/set/xxx.labels + batch: 1 + min_peaks_distance: 3.0 + mid_points_score_threshold: 0.05 + found_mid_points_ratio_threshold: 0.8 + min_joints_number: 3 + min_subset_score: 0.2 + outputs: [ImageWindow, RosTopic, RViz] + connects: + - left: Image + # - left: Video + # - left: StandardCamera + right: [HumanPoseEstimation] + - left: HumanPoseEstimation + right: [ImageWindow, RosTopic, RViz] + +OpenvinoCommon: \ No newline at end of file diff --git a/vino_param_lib/include/vino_param_lib/param_manager.h b/vino_param_lib/include/vino_param_lib/param_manager.h index 74c29c3d..d8452379 100644 --- a/vino_param_lib/include/vino_param_lib/param_manager.h +++ b/vino_param_lib/include/vino_param_lib/param_manager.h @@ -70,6 +70,12 @@ class ParamManager // singleton int batch; float confidence_threshold = 0.5; bool enable_roi_constraint = false; + + float min_peaks_distance = 3.0; + float mid_points_score_threshold = 0.05; + float found_mid_points_ratio_threshold = 0.8; + int min_joints_number = 3; + float min_subset_score = 0.2; }; struct FilterRawData