diff --git a/skel-track-notes.md b/skel-track-notes.md new file mode 100644 index 00000000..46a1b036 --- /dev/null +++ b/skel-track-notes.md @@ -0,0 +1,34 @@ +# Notes for Skeleton Tracking Package + +* Model URL: https://docs.openvinotoolkit.org/latest/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html + +## Installation + +```bash +$ cd /opt/openvino_toolkit/open_model_zoo/tools/downloader +$ python3 downloader.py --name human-pose-estimation-0001 +``` + +## Keypoints + +| Id | Keypoint Name | +| -- | -- | +| 0 | nose | +| 1 | neck | +| 2 | right shoulder | +| 3 | right elbow | +| 4 | right wrist | +| 5 | left shoulder | +| 6 | left elbow | +| 7 | left wrist | +| 8 | right hip | +| 9 | right knee | +| 10 | right ankle | +| 11 | left hip | +| 12 | left knee | +| 13 | left ankle | +| 14 | right eye | +| 15 | left eye | +| 16 | right ear | +| 17 | left ear | + diff --git a/vino_core_lib/CMakeLists.txt b/vino_core_lib/CMakeLists.txt index ebbdc780..eac9c900 100644 --- a/vino_core_lib/CMakeLists.txt +++ b/vino_core_lib/CMakeLists.txt @@ -147,6 +147,8 @@ add_library(${PROJECT_NAME} SHARED src/inferences/object_detection.cpp src/inferences/object_segmentation.cpp src/inferences/person_reidentification.cpp + src/inferences/human_pose_estimation.cpp + src/inferences/peak.cpp src/inputs/realsense_camera.cpp src/inputs/realsense_camera_topic.cpp src/inputs/standard_camera.cpp @@ -160,6 +162,7 @@ add_library(${PROJECT_NAME} SHARED src/models/object_detection_model.cpp src/models/object_segmentation_model.cpp src/models/person_reidentification_model.cpp + src/models/human_pose_estimation_model.cpp src/outputs/image_window_output.cpp src/outputs/ros_topic_output.cpp src/outputs/rviz_output.cpp diff --git a/vino_core_lib/include/vino_core_lib/inferences/base_inference.h b/vino_core_lib/include/vino_core_lib/inferences/base_inference.h index 6e3011e2..2260de98 100644 --- a/vino_core_lib/include/vino_core_lib/inferences/base_inference.h +++ b/vino_core_lib/include/vino_core_lib/inferences/base_inference.h @@ -85,6 +85,7 @@ class Result friend class BaseInference; explicit Result(const cv::Rect& location); inline const cv::Rect getLocation() const { return location_; } + inline void setLocation(const cv::Rect& location) { location_ = location; } private: cv::Rect location_; diff --git a/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h new file mode 100644 index 00000000..b1bd74e6 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/inferences/human_pose_estimation.h @@ -0,0 +1,215 @@ +// TODO add license + +/** + * @brief A header file with declaration for AgeGenderDetection Class + * @file human_pose_estimation.h + */ +#ifndef VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H +#define VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H + +#include +#include +#include + +#include "vino_core_lib/engines/engine.h" +#include "vino_core_lib/inferences/base_inference.h" +#include "vino_core_lib/models/human_pose_estimation_model.h" +#include "inference_engine.hpp" +#include "opencv2/opencv.hpp" + +namespace vino_core_lib +{ + +class HumanPoseKeypoint : public cv::Point2f +{ +public: + HumanPoseKeypoint(float x, float y, float score) + : HumanPoseKeypoint::Point_(x, y), + score(score) {}; + HumanPoseKeypoint(cv::Point2f point) + : HumanPoseKeypoint::Point_(point), + score(score) {}; + float score; +}; + +/** + * @class HumanPoseResult + * @brief Class for storing and processing age and gender detection result. + */ +class HumanPoseResult : public Result +{ + public: + explicit HumanPoseResult(const cv::Rect& location); + HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, // = std::vector(), + const float& score); // = 0); + + // Following similar structure of vino_core_lib/inferences/object_detection.h + // and human_pose_estimation_demo/src/human_pose_estimator.h + + /** + * @brief Get the age keypoints of the estimated pose from the result. + * @return The estimated keypoints. + */ + std::vector getKeypoints() const + { + return keypoints; + } + + /** + * @brief Get the score of the estimated pose from the result. + * @return The score of the estimation. + */ + float getScore() const + { + return score; + } + + std::vector keypointsScores; + std::vector keypoints; + float score = -1; +}; + + +/** + * @class HumanPoseEstimation + * @brief Class to load the human pose estimation model and perform + human pose estimation. + */ +class HumanPoseEstimation : public BaseInference +{ + public: + using Result = vino_core_lib::HumanPoseResult; + HumanPoseEstimation(float minPeaksDistance, + float midPointsScoreThreshold, + float foundMidPointsRatioThreshold, + int minJointsNumber, + float minSubsetScore); + ~HumanPoseEstimation() override; + /** + * @brief Load the age gender detection model. + */ + void loadNetwork(std::shared_ptr); + /** + * @brief Enqueue a frame to this class. + * The frame will be buffered but not inferred yet. + * @param[in] frame The frame to be enqueued. + * @param[in] input_frame_loc The location of the enqueued frame with respect + * to the frame generated by the input device. + * @return Whether this operation is successful. + */ + bool enqueue(const cv::Mat& frame, const cv::Rect&) override; + /** + * @brief Start inference for all buffered frames. + * @return Whether this operation is successful. + */ + bool submitRequest() override; + /** + * @brief This function will fetch the results of the previous inference and + * stores the results in a result buffer array. All buffered frames will be + * cleared. + * @return Whether the Inference object fetches a result this time + */ + bool fetchResults() override; + /** + * @brief Get the length of the buffer result array. + * @return The length of the buffer result array. + */ + const int getResultsLength() const override; + /** + * @brief Get the location of result with respect + * to the frame generated by the input device. + * @param[in] idx The index of the result. + */ + const vino_core_lib::Result* getLocationResult(int idx) const override; + /** + * @brief Get the name of the Inference instance. + * @return The name of the Inference instance. + */ + const std::string getName() const override; + /** + * @brief Show the observed detection result either through image window + * or ROS topic. + */ + const void observeOutput( + const std::shared_ptr& output) override; + + private: + /** + * @brief Processess the network's outputs to extract the valid poses. + * + * Copied from: https://github.com/opencv/open_model_zoo/blob/master/demos/human_pose_estimation_demo/src/human_pose_estimator.cpp + * + * @param heatMapsData Data outputted by the heatmap network. + * @param heatMapOffset Size of each heatmap result. + * @param nHeatMaps Number of keypoints. + * @param pafsData Data outputted by the PAF network. + * @param pafOffset Size of each PAF result. + * @param nPafs Numver of PAFs. + * @param featureMapWidth Width of the heatmap. + * @param featureMapHeight Height of the heatmap. + * @param imageSize Size of the input image. + * @return std::vector A vector with the detected poses. + */ + std::vector postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const; + + /** + * @brief Resizes the heatmap by upSampleRatio. + * + * @param featureMaps A vector with the heatmaps to resize. + */ + void resizeFeatureMaps(std::vector& featureMaps) const; + + /** + * @brief Extracts the poses from the given heatmaps and PAFs. + * + * @param heatMaps Postprocessed heatmaps. + * @param pafs Postprocessed PAFs. + * @return std::vector The detected poses. + */ + std::vector extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const; + + /** + * @brief Aligns the poses' keypoints to the input image. + * + * @param poses Poses (extracted from the heatmaps and PAFs). + * @param featureMapsSize The size of the heatmaps. + * @param imageSize The input image size. + */ + void correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const; + + /** + * @brief Correct the bonding boxes based on the poses' keypoints. + * + * @param poses Poses (with corrected keypoints). + */ + void correctROI(std::vector& poses) const; + + std::shared_ptr valid_model_; + std::vector results_; + int width_ = 0; + int height_ = 0; + + const size_t keypointsNumber_ = 18; + int upsampleRatio_; + float minPeaksDistance_; + float midPointsScoreThreshold_; + float foundMidPointsRatioThreshold_; + int minJointsNumber_; + float minSubsetScore_; + int stride_; + cv::Vec4i pad_; +}; + +} // namespace vino_core_lib + +#endif // VINO_CORE_LIB_INFERENCES_HUMAN_POSE_ESTIMATION_H \ No newline at end of file diff --git a/vino_core_lib/include/vino_core_lib/inferences/peak.h b/vino_core_lib/include/vino_core_lib/inferences/peak.h new file mode 100644 index 00000000..e0310ce8 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/inferences/peak.h @@ -0,0 +1,87 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +// #include "human_pose.hpp" +#include "vino_core_lib/inferences/human_pose_estimation.h" + +namespace human_pose_estimation +{ + +using Result = vino_core_lib::HumanPoseResult; + +struct Peak +{ + Peak(const int id = -1, + const cv::Point2f &pos = cv::Point2f(), + const float score = 0.0f); + + int id; + cv::Point2f pos; + float score; +}; + +struct HumanPoseByPeaksIndices +{ + explicit HumanPoseByPeaksIndices(const int keypointsNumber); + + std::vector peaksIndices; + int nJoints; + float score; +}; + +struct TwoJointsConnection +{ + TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score); + + int firstJointIdx; + int secondJointIdx; + float score; +}; + +void findPeaks(const std::vector &heatMaps, + const float minPeaksDistance, + std::vector> &allPeaks, + int heatMapId); + +std::vector groupPeaksToPoses( + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore); + +class FindPeaksBody : public cv::ParallelLoopBody +{ +public: + FindPeaksBody(const std::vector &heatMaps, float minPeaksDistance, + std::vector> &peaksFromHeatMap) + : heatMaps(heatMaps), + minPeaksDistance(minPeaksDistance), + peaksFromHeatMap(peaksFromHeatMap) {} + + virtual void operator()(const cv::Range &range) const + { + for (int i = range.start; i < range.end; i++) + { + findPeaks(heatMaps, minPeaksDistance, peaksFromHeatMap, i); + } + } + +private: + const std::vector &heatMaps; + float minPeaksDistance; + std::vector> &peaksFromHeatMap; +}; + +} // namespace human_pose_estimation diff --git a/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h b/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h new file mode 100644 index 00000000..f96ddeb3 --- /dev/null +++ b/vino_core_lib/include/vino_core_lib/models/human_pose_estimation_model.h @@ -0,0 +1,67 @@ +// TODO: Add license + +/** + * @brief A header file with declaration for FaceDetectionModel Class + * @file human_pose_estimation_model.h + */ + +#ifndef VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H +#define VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H + +#include +#include "vino_core_lib/models/base_model.h" + +namespace Models +{ +/** + * @class HumanPoseEstimationModel + * @brief This class generates the human pose estimation model. + */ +class HumanPoseEstimationModel : public BaseModel +{ +public: + HumanPoseEstimationModel(const std::string&, int, int, int); + + /** + * @brief Get the input name. + * @return Input name. + */ + inline const std::string getInputName() const + { + return input_; + } + /** + * @brief Get the age from the detection reuslt. + * @return Detected age. + */ + inline const std::string getOutputKeypointsName() const + { + return output_keypoints_; + } + /** + * @brief Get the gender from the detection reuslt. + * @return Detected gender. + */ + inline const std::string getOutputHeatmapName() const + { + return output_heatmap_; + } + /** + * @brief Get the name of this detection model. + * @return Name of the model. + */ + const std::string getModelName() const override; + +protected: + void checkLayerProperty(const InferenceEngine::CNNNetReader::Ptr&) override; + void setLayerProperty(InferenceEngine::CNNNetReader::Ptr) override; + + private: + std::string input_; + std::string output_keypoints_; + std::string output_heatmap_; +}; + +} // namespace Models + +#endif // VINO_CORE_LIB_MODELS_HUMAN_POSE_ESTIMATION_MODEL_H \ No newline at end of file diff --git a/vino_core_lib/include/vino_core_lib/outputs/base_output.h b/vino_core_lib/include/vino_core_lib/outputs/base_output.h index 5fa5bfce..0f0a0401 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/base_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/base_output.h @@ -33,6 +33,7 @@ #include "vino_core_lib/inferences/object_detection.h" #include "vino_core_lib/inferences/object_segmentation.h" #include "vino_core_lib/inferences/person_reidentification.h" +#include "vino_core_lib/inferences/human_pose_estimation.h" #include "vino_core_lib/services/frame_processing_server.h" #include "opencv2/opencv.hpp" @@ -97,6 +98,12 @@ class BaseOutput virtual void accept(const std::vector &) { } + /** + * @brief Generate output content according to the human pose estimation result. + */ + virtual void accept(const std::vector &) + { + } /** * @brief Calculate the camera matrix of a frame for image window output, no implementation for ros topic output. @@ -126,6 +133,8 @@ class BaseOutput boost::shared_ptr response) {} virtual void setServiceResponse( boost::shared_ptr response) {} + virtual void setServiceResponse( + boost::shared_ptr response) {} Pipeline* getPipeline() const; cv::Mat getFrame() const; virtual void clearData() {} diff --git a/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h b/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h index ce381a6b..4fb41274 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/image_window_output.h @@ -95,6 +95,14 @@ class ImageWindowOutput : public BaseOutput * @param[in] An object segmentation result objetc. */ void accept(const std::vector &) override; + + /** + * @brief Generate image window output content according to + * the human pose estimation result. + * @param[in] An human pose estimation result objetc. + */ + void accept(const std::vector &) override; + /** * @brief Merge mask for image window ouput * the object segmentation result. @@ -128,6 +136,7 @@ class ImageWindowOutput : public BaseOutput cv::Point hp_y; // for headpose, end point of yAxis cv::Point hp_zs; // for headpose, start point of zAxis cv::Point hp_ze; // for headpose, end point of zAxis + std::vector kp; // for humanpose, keypoints }; std::vector outputs_; diff --git a/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h b/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h index 373ace30..9615c7c3 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/ros_service_output.h @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -41,7 +43,7 @@ #include #include #include - +#include #include @@ -78,6 +80,7 @@ class RosServiceOutput : public RosTopicOutput void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); void setServiceResponse(boost::shared_ptr response); + void setServiceResponse(boost::shared_ptr response); private: const std::string service_name_; diff --git a/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h b/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h index 5b783d98..adfc563d 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/ros_topic_output.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -109,6 +111,13 @@ class RosTopicOutput : public BaseOutput */ void accept(const std::vector&) override; + /** + * @brief Generate ros topic infomation according to + * the human pose estimation result. + * @param[in] An human pose estimation result objetc. + */ + void accept(const std::vector&) override; + private: std_msgs::Header getHeader(); @@ -130,6 +139,8 @@ class RosTopicOutput : public BaseOutput std::shared_ptr person_reid_msg_ptr_; ros::Publisher pub_segmented_object_; std::shared_ptr segmented_object_msg_ptr_; + ros::Publisher pub_human_pose_; + std::shared_ptr human_pose_msg_ptr_; }; } // namespace Outputs diff --git a/vino_core_lib/include/vino_core_lib/outputs/rviz_output.h b/vino_core_lib/include/vino_core_lib/outputs/rviz_output.h index e932ec20..82a5c6b0 100644 --- a/vino_core_lib/include/vino_core_lib/outputs/rviz_output.h +++ b/vino_core_lib/include/vino_core_lib/outputs/rviz_output.h @@ -92,6 +92,15 @@ class RvizOutput : public BaseOutput * @param[in] An object segmentation result objetc. */ void accept(const std::vector &) override; + + /** + * @brief Generate rviz output content according to + * the human pose estimation result. + * @param[in] An object segmentation result objetc. + */ + void accept(const std::vector &) override; + + // TODO remove comment. /** * @brief Merge mask for image window ouput * the object segmentation result. diff --git a/vino_core_lib/include/vino_core_lib/pipeline_manager.h b/vino_core_lib/include/vino_core_lib/pipeline_manager.h index 2ce98467..3a6d81db 100644 --- a/vino_core_lib/include/vino_core_lib/pipeline_manager.h +++ b/vino_core_lib/include/vino_core_lib/pipeline_manager.h @@ -104,6 +104,8 @@ class PipelineManager { const Params::ParamManager::InferenceParams& infer); std::shared_ptr createPersonReidentification( const Params::ParamManager::InferenceParams& infer); + std::shared_ptr createHumanPoseEstimation( + const Params::ParamManager::InferenceParams& infer); std::map pipelines_; std::map plugins_for_devices_; }; diff --git a/vino_core_lib/include/vino_core_lib/pipeline_params.h b/vino_core_lib/include/vino_core_lib/pipeline_params.h index 676e2e5e..2a934d42 100644 --- a/vino_core_lib/include/vino_core_lib/pipeline_params.h +++ b/vino_core_lib/include/vino_core_lib/pipeline_params.h @@ -55,6 +55,7 @@ const char kInferTpye_HeadPoseEstimation[] = "HeadPoseEstimation"; const char kInferTpye_ObjectDetection[] = "ObjectDetection"; const char kInferTpye_ObjectSegmentation[] = "ObjectSegmentation"; const char kInferTpye_PersonReidentification[] = "PersonReidentification"; +const char kInferTpye_HumanPoseEstimation[] = "HumanPoseEstimation"; /** * @class PipelineParams diff --git a/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h b/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h index b69d54c6..7d1cf431 100644 --- a/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h +++ b/vino_core_lib/include/vino_core_lib/services/frame_processing_server.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/vino_core_lib/src/inferences/human_pose_estimation.cpp b/vino_core_lib/src/inferences/human_pose_estimation.cpp new file mode 100644 index 00000000..f1f9bb8a --- /dev/null +++ b/vino_core_lib/src/inferences/human_pose_estimation.cpp @@ -0,0 +1,262 @@ +// TODO add license + +/** + * @brief a header file with declaration of HumanPoseResult class //TODO update + * @file human_pose_estimation.cpp + * + * This file was mostly based on age_gender_detection.cpp. It also uses the code + * from object_detection.cpp. + */ + +#include +#include + +#include "vino_core_lib/inferences/human_pose_estimation.h" +#include "vino_core_lib/outputs/base_output.h" +#include "vino_core_lib/inferences/peak.h" + +// HumanPoseResult +vino_core_lib::HumanPoseResult::HumanPoseResult(const cv::Rect& location) + : Result(location) +{ +} + +vino_core_lib::HumanPoseResult::HumanPoseResult( + const cv::Rect& location, + const std::vector& keypoints, + const float& score) + : keypoints(keypoints), + score(score), + Result(location) +{ +} + +// HumanPoseEstimation +vino_core_lib::HumanPoseEstimation::HumanPoseEstimation( + float minPeaksDistance, + float midPointsScoreThreshold, + float foundMidPointsRatioThreshold, + int minJointsNumber, + float minSubsetScore) + : vino_core_lib::BaseInference(), + upsampleRatio_(4), + minPeaksDistance_(minPeaksDistance), + midPointsScoreThreshold_(midPointsScoreThreshold), + foundMidPointsRatioThreshold_(foundMidPointsRatioThreshold), + minJointsNumber_(minJointsNumber), + minSubsetScore_(minSubsetScore), + stride_(8), + pad_(cv::Vec4i::all(0)) +{ +} + +vino_core_lib::HumanPoseEstimation::~HumanPoseEstimation() = default; + +void vino_core_lib::HumanPoseEstimation::loadNetwork( + std::shared_ptr network) +{ + valid_model_ = network; + setMaxBatchSize(network->getMaxBatchSize()); +} + +bool vino_core_lib::HumanPoseEstimation::enqueue( + const cv::Mat& frame, const cv::Rect& input_frame_loc) +{ + // object_detection.cpp + if (width_ == 0 && height_ == 0) + { + width_ = frame.cols; + height_ = frame.rows; + } + + if (!vino_core_lib::BaseInference::enqueue( + frame, input_frame_loc, 1, 0, valid_model_->getInputName())) + { + return false; + } + Result r(input_frame_loc); + results_.clear(); + results_.emplace_back(r); + return true; +} + +bool vino_core_lib::HumanPoseEstimation::submitRequest() +{ + return vino_core_lib::BaseInference::submitRequest(); +} + +bool vino_core_lib::HumanPoseEstimation::fetchResults() +{ + bool can_fetch = vino_core_lib::BaseInference::fetchResults(); + if (!can_fetch) return false; + auto request = getEngine()->getRequest(); + InferenceEngine::Blob::Ptr keypointsBlob = + request->GetBlob(valid_model_->getOutputKeypointsName()); + InferenceEngine::Blob::Ptr heatmapBlob = + request->GetBlob(valid_model_->getOutputHeatmapName()); + + results_.clear(); + CV_Assert(heatmapBlob->getTensorDesc().getDims()[1] == keypointsNumber_ + 1); + InferenceEngine::SizeVector heatMapDims = + heatmapBlob->getTensorDesc().getDims(); + //std::vector poses = postprocess( + results_ = postprocess( + heatmapBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsNumber_, + keypointsBlob->buffer(), + heatMapDims[2] * heatMapDims[3], + keypointsBlob->getTensorDesc().getDims()[1], + heatMapDims[3], heatMapDims[2], cv::Size(width_, height_)); + return true; +} + +const int vino_core_lib::HumanPoseEstimation::getResultsLength() const +{ + return static_cast(results_.size()); +} + +const vino_core_lib::Result* +vino_core_lib::HumanPoseEstimation::getLocationResult(int idx) const +{ + return &(results_[idx]); +} + +const std::string vino_core_lib::HumanPoseEstimation::getName() const +{ + return valid_model_->getModelName(); +} + +const void vino_core_lib::HumanPoseEstimation::observeOutput( + const std::shared_ptr& output) +{ + if (output != nullptr) + { + output->accept(results_); + } +} + +using Result = vino_core_lib::HumanPoseResult; + +std::vector vino_core_lib::HumanPoseEstimation::postprocess( + const float* heatMapsData, const int heatMapOffset, const int nHeatMaps, + const float* pafsData, const int pafOffset, const int nPafs, + const int featureMapWidth, const int featureMapHeight, + const cv::Size& imageSize) const +{ + std::vector heatMaps(nHeatMaps); + for (size_t i = 0; i < heatMaps.size(); i++) + { + heatMaps[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + heatMapsData + i * heatMapOffset))); + } + + resizeFeatureMaps(heatMaps); + + std::vector pafs(nPafs); + for (size_t i = 0; i < pafs.size(); i++) + { + pafs[i] = cv::Mat(featureMapHeight, featureMapWidth, CV_32FC1, + reinterpret_cast( + const_cast( + pafsData + i * pafOffset))); + } + resizeFeatureMaps(pafs); + + std::vector poses = extractPoses(heatMaps, pafs); + correctCoordinates(poses, heatMaps[0].size(), imageSize); + correctROI(poses); + return poses; +} + +std::vector vino_core_lib::HumanPoseEstimation::extractPoses( + const std::vector& heatMaps, + const std::vector& pafs) const +{ + std::vector> peaksFromHeatMap(heatMaps.size()); + human_pose_estimation::FindPeaksBody findPeaksBody(heatMaps, minPeaksDistance_, peaksFromHeatMap); + cv::parallel_for_(cv::Range(0, static_cast(heatMaps.size())), findPeaksBody); + int peaksBefore = 0; + for (size_t heatmapId = 1; heatmapId < heatMaps.size(); heatmapId++) + { + peaksBefore += static_cast(peaksFromHeatMap[heatmapId - 1].size()); + for (auto& peak : peaksFromHeatMap[heatmapId]) + { + peak.id += peaksBefore; + } + } + std::vector poses = groupPeaksToPoses( + peaksFromHeatMap, pafs, keypointsNumber_, midPointsScoreThreshold_, + foundMidPointsRatioThreshold_, minJointsNumber_, minSubsetScore_); + return poses; +} + +void vino_core_lib::HumanPoseEstimation::resizeFeatureMaps( + std::vector& featureMaps) const +{ + for (auto& featureMap : featureMaps) + { + cv::resize(featureMap, featureMap, cv::Size(), + upsampleRatio_, upsampleRatio_, cv::INTER_CUBIC); + } +} + +void vino_core_lib::HumanPoseEstimation::correctCoordinates(std::vector& poses, + const cv::Size& featureMapsSize, + const cv::Size& imageSize) const +{ + CV_Assert(stride_ % upsampleRatio_ == 0); + + cv::Size fullFeatureMapSize = featureMapsSize * stride_ / upsampleRatio_; + + float scaleX = imageSize.width / + static_cast(fullFeatureMapSize.width - pad_(1) - pad_(3)); + float scaleY = imageSize.height / + static_cast(fullFeatureMapSize.height - pad_(0) - pad_(2)); + for (auto& pose : poses) + { + for (auto& keypoint : pose.keypoints) + { + if (keypoint != cv::Point2f(-1, -1)) + { + keypoint.x *= stride_ / upsampleRatio_; + keypoint.x -= pad_(1); + keypoint.x *= scaleX; + + keypoint.y *= stride_ / upsampleRatio_; + keypoint.y -= pad_(0); + keypoint.y *= scaleY; + } + } + } +} + +void vino_core_lib::HumanPoseEstimation::correctROI( + std::vector& poses) const +{ + for (auto& pose : poses) + { + int xMin = width_; + int xMax = 0; + int yMin = height_; + int yMax = 0; + for (auto& kp: pose.keypoints) + { + if (kp.x < 0) continue; + + int x = static_cast(kp.x); + int y = static_cast(kp.y); + + if (x > xMax) xMax = x; + if (x < xMin) xMin = x; + + if (y > yMax) yMax = y; + if (y < yMin) yMin = y; + } + // slog::info << "rect at: (" << xMin << ", " << yMin << "), (" << xMax << ", " << yMax << ")" << slog::endl; + cv::Rect newLocation = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin); + pose.setLocation(newLocation); + } +} \ No newline at end of file diff --git a/vino_core_lib/src/inferences/peak.cpp b/vino_core_lib/src/inferences/peak.cpp new file mode 100644 index 00000000..532fb6ae --- /dev/null +++ b/vino_core_lib/src/inferences/peak.cpp @@ -0,0 +1,370 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "vino_core_lib/inferences/peak.h" + +namespace human_pose_estimation +{ +Peak::Peak(const int id, const cv::Point2f &pos, const float score) + : id(id), + pos(pos), + score(score) {} + +HumanPoseByPeaksIndices::HumanPoseByPeaksIndices(const int keypointsNumber) + : peaksIndices(std::vector(keypointsNumber, -1)), + nJoints(0), + score(0.0f) {} + +TwoJointsConnection::TwoJointsConnection(const int firstJointIdx, + const int secondJointIdx, + const float score) + : firstJointIdx(firstJointIdx), + secondJointIdx(secondJointIdx), + score(score) {} + +void findPeaks(const std::vector &heatMaps, + const float minPeaksDistance, + std::vector> &allPeaks, + int heatMapId) +{ + const float threshold = 0.1f; + std::vector peaks; + const cv::Mat &heatMap = heatMaps[heatMapId]; + const float *heatMapData = heatMap.ptr(); + size_t heatMapStep = heatMap.step1(); + for (int y = -1; y < heatMap.rows + 1; y++) + { + for (int x = -1; x < heatMap.cols + 1; x++) + { + float val = 0; + if (x >= 0 && y >= 0 && x < heatMap.cols && y < heatMap.rows) + { + val = heatMapData[y * heatMapStep + x]; + val = val >= threshold ? val : 0; + } + + float left_val = 0; + if (y >= 0 && x < (heatMap.cols - 1) && y < heatMap.rows) + { + left_val = heatMapData[y * heatMapStep + x + 1]; + left_val = left_val >= threshold ? left_val : 0; + } + + float right_val = 0; + if (x > 0 && y >= 0 && y < heatMap.rows) + { + right_val = heatMapData[y * heatMapStep + x - 1]; + right_val = right_val >= threshold ? right_val : 0; + } + + float top_val = 0; + if (x >= 0 && x < heatMap.cols && y < (heatMap.rows - 1)) + { + top_val = heatMapData[(y + 1) * heatMapStep + x]; + top_val = top_val >= threshold ? top_val : 0; + } + + float bottom_val = 0; + if (x >= 0 && y > 0 && x < heatMap.cols) + { + bottom_val = heatMapData[(y - 1) * heatMapStep + x]; + bottom_val = bottom_val >= threshold ? bottom_val : 0; + } + + if ((val > left_val) && (val > right_val) && (val > top_val) && (val > bottom_val)) + { + peaks.push_back(cv::Point(x, y)); + } + } + } + std::sort(peaks.begin(), peaks.end(), [](const cv::Point &a, const cv::Point &b) { + return a.x < b.x; + }); + std::vector isActualPeak(peaks.size(), true); + int peakCounter = 0; + std::vector &peaksWithScoreAndID = allPeaks[heatMapId]; + for (size_t i = 0; i < peaks.size(); i++) + { + if (isActualPeak[i]) + { + for (size_t j = i + 1; j < peaks.size(); j++) + { + if (sqrt((peaks[i].x - peaks[j].x) * (peaks[i].x - peaks[j].x) + + (peaks[i].y - peaks[j].y) * (peaks[i].y - peaks[j].y)) < minPeaksDistance) + { + isActualPeak[j] = false; + } + } + peaksWithScoreAndID.push_back(Peak(peakCounter++, peaks[i], heatMap.at(peaks[i]))); + } + } +} + +std::vector groupPeaksToPoses( + const std::vector> &allPeaks, + const std::vector &pafs, + const size_t keypointsNumber, + const float midPointsScoreThreshold, + const float foundMidPointsRatioThreshold, + const int minJointsNumber, + const float minSubsetScore) +{ + const std::vector> limbIdsHeatmap = { + {2, 3}, {2, 6}, {3, 4}, {4, 5}, {6, 7}, {7, 8}, {2, 9}, + {9, 10}, {10, 11}, {2, 12}, {12, 13}, {13, 14}, {2, 1}, + {1, 15}, {15, 17}, {1, 16}, {16, 18}, {3, 17}, {6, 18}}; + const std::vector> limbIdsPaf = { + {31, 32}, {39, 40}, {33, 34}, {35, 36}, {41, 42}, {43, 44}, {19, 20}, + {21, 22}, {23, 24}, {25, 26}, {27, 28}, {29, 30}, {47, 48}, {49, 50}, + {53, 54}, {51, 52}, {55, 56}, {37, 38}, {45, 46}}; + + std::vector candidates; + for (const auto &peaks : allPeaks) + { + candidates.insert(candidates.end(), peaks.begin(), peaks.end()); + } + std::vector subset(0, HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t k = 0; k < limbIdsPaf.size(); k++) + { + std::vector connections; + const int mapIdxOffset = keypointsNumber + 1; + std::pair scoreMid = {pafs[limbIdsPaf[k].first - mapIdxOffset], + pafs[limbIdsPaf[k].second - mapIdxOffset]}; + const int idxJointA = limbIdsHeatmap[k].first - 1; + const int idxJointB = limbIdsHeatmap[k].second - 1; + const std::vector &candA = allPeaks[idxJointA]; + const std::vector &candB = allPeaks[idxJointB]; + const size_t nJointsA = candA.size(); + const size_t nJointsB = candB.size(); + if (nJointsA == 0 && nJointsB == 0) + { + continue; + } + else if (nJointsA == 0) + { + for (size_t i = 0; i < nJointsB; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointB] == candB[i].id) + { + num++; + continue; + } + } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointB] = candB[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candB[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + else if (nJointsB == 0) + { + for (size_t i = 0; i < nJointsA; i++) + { + int num = 0; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == candA[i].id) + { + num++; + continue; + } + } + if (num == 0) + { + HumanPoseByPeaksIndices personKeypoints(keypointsNumber); + personKeypoints.peaksIndices[idxJointA] = candA[i].id; + personKeypoints.nJoints = 1; + personKeypoints.score = candA[i].score; + subset.push_back(personKeypoints); + } + } + continue; + } + + std::vector tempJointConnections; + for (size_t i = 0; i < nJointsA; i++) + { + for (size_t j = 0; j < nJointsB; j++) + { + cv::Point2f pt = candA[i].pos * 0.5 + candB[j].pos * 0.5; + cv::Point mid = cv::Point(cvRound(pt.x), cvRound(pt.y)); + cv::Point2f vec = candB[j].pos - candA[i].pos; + double norm_vec = cv::norm(vec); + if (norm_vec == 0) + { + continue; + } + vec /= norm_vec; + float score = vec.x * scoreMid.first.at(mid) + vec.y * scoreMid.second.at(mid); + int height_n = pafs[0].rows / 2; + float suc_ratio = 0.0f; + float mid_score = 0.0f; + const int mid_num = 10; + const float scoreThreshold = -100.0f; + if (score > scoreThreshold) + { + float p_sum = 0; + int p_count = 0; + cv::Size2f step((candB[j].pos.x - candA[i].pos.x) / (mid_num - 1), + (candB[j].pos.y - candA[i].pos.y) / (mid_num - 1)); + for (int n = 0; n < mid_num; n++) + { + cv::Point midPoint(cvRound(candA[i].pos.x + n * step.width), + cvRound(candA[i].pos.y + n * step.height)); + cv::Point2f pred(scoreMid.first.at(midPoint), + scoreMid.second.at(midPoint)); + score = vec.x * pred.x + vec.y * pred.y; + if (score > midPointsScoreThreshold) + { + p_sum += score; + p_count++; + } + } + suc_ratio = static_cast(p_count / mid_num); + float ratio = p_count > 0 ? p_sum / p_count : 0.0f; + mid_score = ratio + static_cast(std::min(height_n / norm_vec - 1, 0.0)); + } + if (mid_score > 0 && suc_ratio > foundMidPointsRatioThreshold) + { + tempJointConnections.push_back(TwoJointsConnection(i, j, mid_score)); + } + } + } + if (!tempJointConnections.empty()) + { + std::sort(tempJointConnections.begin(), tempJointConnections.end(), + [](const TwoJointsConnection &a, + const TwoJointsConnection &b) { + return (a.score > b.score); + }); + } + size_t num_limbs = std::min(nJointsA, nJointsB); + size_t cnt = 0; + std::vector occurA(nJointsA, 0); + std::vector occurB(nJointsB, 0); + for (size_t row = 0; row < tempJointConnections.size(); row++) + { + if (cnt == num_limbs) + { + break; + } + const int &indexA = tempJointConnections[row].firstJointIdx; + const int &indexB = tempJointConnections[row].secondJointIdx; + const float &score = tempJointConnections[row].score; + if (occurA[indexA] == 0 && occurB[indexB] == 0) + { + connections.push_back(TwoJointsConnection(candA[indexA].id, candB[indexB].id, score)); + cnt++; + occurA[indexA] = 1; + occurB[indexB] = 1; + } + } + if (connections.empty()) + { + continue; + } + + bool extraJointConnections = (k == 17 || k == 18); + if (k == 0) + { + subset = std::vector( + connections.size(), HumanPoseByPeaksIndices(keypointsNumber)); + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + subset[i].peaksIndices[idxJointA] = indexA; + subset[i].peaksIndices[idxJointB] = indexB; + subset[i].nJoints = 2; + subset[i].score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + } + } + else if (extraJointConnections) + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA && subset[j].peaksIndices[idxJointB] == -1) + { + subset[j].peaksIndices[idxJointB] = indexB; + } + else if (subset[j].peaksIndices[idxJointB] == indexB && subset[j].peaksIndices[idxJointA] == -1) + { + subset[j].peaksIndices[idxJointA] = indexA; + } + } + } + continue; + } + else + { + for (size_t i = 0; i < connections.size(); i++) + { + const int &indexA = connections[i].firstJointIdx; + const int &indexB = connections[i].secondJointIdx; + bool num = false; + for (size_t j = 0; j < subset.size(); j++) + { + if (subset[j].peaksIndices[idxJointA] == indexA) + { + subset[j].peaksIndices[idxJointB] = indexB; + subset[j].nJoints++; + subset[j].score += candidates[indexB].score + connections[i].score; + num = true; + } + } + if (!num) + { + HumanPoseByPeaksIndices hpWithScore(keypointsNumber); + hpWithScore.peaksIndices[idxJointA] = indexA; + hpWithScore.peaksIndices[idxJointB] = indexB; + hpWithScore.nJoints = 2; + hpWithScore.score = candidates[indexA].score + candidates[indexB].score + connections[i].score; + subset.push_back(hpWithScore); + } + } + } + } + std::vector poses; + for (const auto &subsetI : subset) + { + if (subsetI.nJoints < minJointsNumber || subsetI.score / subsetI.nJoints < minSubsetScore) + { + continue; + } + int position = -1; + Result pose(cv::Rect(), + std::vector(keypointsNumber, cv::Point2f(-1.0f, -1.0f)), + subsetI.score * std::max(0, subsetI.nJoints - 1)); + for (const auto &peakIdx : subsetI.peaksIndices) + { + position++; + if (peakIdx >= 0) + { + pose.keypoints[position] = candidates[peakIdx].pos; + pose.keypoints[position].x += 0.5; + pose.keypoints[position].y += 0.5; + pose.keypoints[position].score = candidates[peakIdx].score; + } + } + poses.push_back(pose); + } + return poses; +} +} // namespace human_pose_estimation diff --git a/vino_core_lib/src/models/human_pose_estimation_model.cpp b/vino_core_lib/src/models/human_pose_estimation_model.cpp new file mode 100644 index 00000000..0875086a --- /dev/null +++ b/vino_core_lib/src/models/human_pose_estimation_model.cpp @@ -0,0 +1,140 @@ +// TODO add license + +/* +Model URL: https://docs.openvinotoolkit.org/2019_R3.1/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html +*/ + +#include +#include + +#include "vino_core_lib/models/human_pose_estimation_model.h" +#include "vino_core_lib/slog.h" + +Models::HumanPoseEstimationModel::HumanPoseEstimationModel( + const std::string& model_loc, + int input_num, int output_num, + int max_batch_size) + : BaseModel(model_loc, input_num, output_num, max_batch_size) +{ +} + +void Models::HumanPoseEstimationModel::setLayerProperty( + InferenceEngine::CNNNetReader::Ptr net_reader) +{ + // set input property + InferenceEngine::InputsDataMap input_info_map( + net_reader->getNetwork().getInputsInfo()); + InferenceEngine::InputInfo::Ptr input_info = input_info_map.begin()->second; + input_info->setPrecision(InferenceEngine::Precision::U8); // It's an image + input_info->setLayout(InferenceEngine::Layout::NCHW); // No idea. + // set output property + InferenceEngine::OutputsDataMap output_info_map( + net_reader->getNetwork().getOutputsInfo()); + auto it = output_info_map.begin(); + InferenceEngine::DataPtr keypoints_output_ptr = (it++)->second; + InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + keypoints_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + keypoints_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + heatmap_output_ptr->setPrecision(InferenceEngine::Precision::FP32); + heatmap_output_ptr->setLayout(InferenceEngine::Layout::NCHW); + // set input and output layer name + input_ = input_info_map.begin()->first; + output_keypoints_ = keypoints_output_ptr->name; + output_heatmap_ = heatmap_output_ptr->name; +} + +void Models::HumanPoseEstimationModel::checkLayerProperty( + const InferenceEngine::CNNNetReader::Ptr& net_reader) +{ + slog::info << "Checking Human Pose Estimation outputs" << slog::endl; + InferenceEngine::OutputsDataMap output_info( + net_reader->getNetwork().getOutputsInfo()); + auto it = output_info.begin(); + InferenceEngine::DataPtr paf_output_ptr = (it++)->second; + InferenceEngine::DataPtr heatmap_output_ptr = (it++)->second; + + if (paf_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + paf_output_ptr->getCreatorLayer().lock()->type); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->type != "Convolution") + { + throw std::logic_error("In Human Pose Estimation network, heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should be a Convolution, but was: " + + heatmap_output_ptr->getCreatorLayer().lock()->type); + } + + if (paf_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(paf_output_ptr->getCreatorLayer().lock()->outData.size())); + } + if (heatmap_output_ptr->getCreatorLayer().lock()->outData.size() != 1) + { + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have 1 output, but had: " + + std::to_string(heatmap_output_ptr->getCreatorLayer().lock()->outData.size())); + } + + if (paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims.size() == 4 && + paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims[2] == 19) + { + std::swap(paf_output_ptr, heatmap_output_ptr); + } + + auto pafDims = paf_output_ptr->getCreatorLayer().lock()->outData[0]->dims; + auto heatDims = heatmap_output_ptr->getCreatorLayer().lock()->outData[0]->dims; + + if (pafDims.size() != 4 || pafDims[0] != 57 || pafDims[1] != 32 + || pafDims[2] != 38 || pafDims[3] != 1) + { + std::ostringstream size; + size << "[ "; + for (size_t s : pafDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, PAF layer (" + + paf_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 38 1 ], but had: " + size.str()); + } + + if (heatDims.size() != 4 || heatDims[0] != 57 || heatDims[1] != 32 + || heatDims[2] != 19 || heatDims[3] != 1) + { + std::ostringstream size; + size << "[ "; + for (size_t s : heatDims) + { + size << s << " "; + } + size << "]"; + throw std::logic_error( + "In Human Pose Estimation network, Heatmap layer (" + + heatmap_output_ptr->getCreatorLayer().lock()->name + + ") should have output size of [ 57 32 19 1 ], but had: " + size.str()); + } + + slog::info << "PAF layer: " + << paf_output_ptr->getCreatorLayer().lock()->name + << slog::endl; + slog::info << "Heatmap layer: " + << heatmap_output_ptr->getCreatorLayer().lock()->name + << slog::endl; +} + +const std::string Models::HumanPoseEstimationModel::getModelName() const +{ + return "Human Pose Estimation"; +} \ No newline at end of file diff --git a/vino_core_lib/src/outputs/image_window_output.cpp b/vino_core_lib/src/outputs/image_window_output.cpp index db74b780..bcd6125d 100644 --- a/vino_core_lib/src/outputs/image_window_output.cpp +++ b/vino_core_lib/src/outputs/image_window_output.cpp @@ -315,6 +315,22 @@ void Outputs::ImageWindowOutput::accept( } } +void Outputs::ImageWindowOutput::accept( + const std::vector & results) +{ + OutputData output; + output.scalar = cv::Scalar(255, 0, 0); + for (unsigned i = 0; i < results.size(); i++) { + cv::Rect result_rect = results[i].getLocation(); + auto score = std::to_string(results[i].getScore()); + + output.rect = result_rect; + output.desc = "Pose " + std::to_string(i) + " [" + score.substr(0, score.find(".") + 2) + "]"; + output.kp = results[i].getKeypoints(); + outputs_.push_back(output); + } +} + void Outputs::ImageWindowOutput::decorateFrame() { if (getPipeline()->getParameters()->isGetFps()) @@ -336,6 +352,17 @@ void Outputs::ImageWindowOutput::decorateFrame() cv::line(frame_, o.hp_cp, o.hp_y, cv::Scalar(0, 255, 0), 2); cv::line(frame_, o.hp_zs, o.hp_ze, cv::Scalar(255, 0, 0), 2); cv::circle(frame_, o.hp_ze, 3, cv::Scalar(255, 0, 0), 2); + + for (auto kp : o.kp) + { + if (kp.x >= 0) + { + std::string score = std::to_string(kp.score).substr(0, 3); + cv::circle(frame_, kp, 3, cv::Scalar(255, 0, 0), cv::FILLED); + cv::putText(frame_, score, kp + cv::Point2f(5, 0), + cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, o.scalar); + } + } } outputs_.clear(); diff --git a/vino_core_lib/src/outputs/ros_service_output.cpp b/vino_core_lib/src/outputs/ros_service_output.cpp index 4014c46e..d878434b 100644 --- a/vino_core_lib/src/outputs/ros_service_output.cpp +++ b/vino_core_lib/src/outputs/ros_service_output.cpp @@ -91,6 +91,16 @@ void Outputs::RosServiceOutput::setServiceResponse( } } +void Outputs::RosServiceOutput::setServiceResponse( + boost::shared_ptr response) +{ + slog::info << "in Human Pose Estimation service::Response ..."; + if (human_pose_msg_ptr_ != nullptr) { + response->humanposes = human_pose_msg_ptr_->humanposes; + } + slog::info << "DONE!" << slog::endl; +} + void Outputs::RosServiceOutput::setServiceResponse( boost::shared_ptr response) { @@ -126,4 +136,5 @@ void Outputs::RosServiceOutput::clearData() headpose_msg_ptr_ = nullptr; segmented_object_msg_ptr_ = nullptr; person_reid_msg_ptr_ = nullptr; + human_pose_msg_ptr_ = nullptr; } diff --git a/vino_core_lib/src/outputs/ros_topic_output.cpp b/vino_core_lib/src/outputs/ros_topic_output.cpp index cbad29fc..3ef55bc3 100644 --- a/vino_core_lib/src/outputs/ros_topic_output.cpp +++ b/vino_core_lib/src/outputs/ros_topic_output.cpp @@ -24,6 +24,7 @@ #include #include #include +#include Outputs::RosTopicOutput::RosTopicOutput() { @@ -41,6 +42,8 @@ Outputs::RosTopicOutput::RosTopicOutput() "/openvino_toolkit/reidentified_persons", 16); pub_segmented_object_ = nh_.advertise( "/openvino_toolkit/segmented_obejcts", 16); + pub_human_pose_ = nh_.advertise( + "/openvino_toolkit/human_poses", 16); emotions_msg_ptr_ = NULL; faces_msg_ptr_ = NULL; @@ -49,6 +52,7 @@ Outputs::RosTopicOutput::RosTopicOutput() object_msg_ptr_ = NULL; person_reid_msg_ptr_ = NULL; segmented_object_msg_ptr_ = NULL; + human_pose_msg_ptr_ = NULL; } void Outputs::RosTopicOutput::feedFrame(const cv::Mat& frame) {} @@ -201,6 +205,43 @@ void Outputs::RosTopicOutput::accept( } } +void Outputs::RosTopicOutput::accept( + const std::vector& results) +{ + human_pose_msg_ptr_ = std::make_shared(); + + for (auto r : results) + { + vino_people_msgs::HumanPose hp; + + auto loc = r.getLocation(); + hp.roi.x_offset = loc.x; + hp.roi.y_offset = loc.y; + hp.roi.width = loc.width; + hp.roi.height = loc.height; + hp.score = r.getScore(); + + auto p = vino_people_msgs::HumanPoseKeypoint();// geometry_msgs::Point(); + p.position.z = -1; + for (auto kp : r.keypoints) + { + p.position.x = kp.x; + p.position.y = kp.y; + if (kp.x >= 0) + { + p.score = kp.score; + } + else + { + p.score = 0; + } + + hp.keypoints.push_back(p); + } + human_pose_msg_ptr_->humanposes.push_back(hp); + } +} + void Outputs::RosTopicOutput::handleOutput() { std_msgs::Header header = getHeader(); @@ -264,7 +305,7 @@ void Outputs::RosTopicOutput::handleOutput() pub_object_.publish(object_msg); object_msg_ptr_ = nullptr; } - if (object_msg_ptr_ != nullptr) + if (object_msg_ptr_ != nullptr) // TODO: verify and remove duplicate code. { object_msgs::ObjectsInBoxes object_msg; object_msg.header = header; @@ -273,6 +314,15 @@ void Outputs::RosTopicOutput::handleOutput() pub_object_.publish(object_msg); object_msg_ptr_ = nullptr; } + if (human_pose_msg_ptr_ != nullptr) + { + vino_people_msgs::HumanPoseStamped human_pose_msg; + human_pose_msg.header = header; + human_pose_msg.humanposes.swap(human_pose_msg_ptr_->humanposes); + + pub_human_pose_.publish(human_pose_msg); + human_pose_msg_ptr_ = nullptr; + } } std_msgs::Header Outputs::RosTopicOutput::getHeader() diff --git a/vino_core_lib/src/outputs/rviz_output.cpp b/vino_core_lib/src/outputs/rviz_output.cpp index dabd929c..e23c38fc 100644 --- a/vino_core_lib/src/outputs/rviz_output.cpp +++ b/vino_core_lib/src/outputs/rviz_output.cpp @@ -71,6 +71,10 @@ void Outputs::RvizOutput::accept(const std::vectoraccept(results); } +void Outputs::RvizOutput::accept(const std::vector & results) +{ + image_window_output_->accept(results); +} void Outputs::RvizOutput::handleOutput() diff --git a/vino_core_lib/src/pipeline_manager.cpp b/vino_core_lib/src/pipeline_manager.cpp index 497e863c..f4228082 100644 --- a/vino_core_lib/src/pipeline_manager.cpp +++ b/vino_core_lib/src/pipeline_manager.cpp @@ -192,14 +192,16 @@ PipelineManager::parseInference( } else if (infer.name == kInferTpye_ObjectDetection) { object = createObjectDetection(infer); - } else if (infer.name == kInferTpye_ObjectSegmentation) { object = createObjectSegmentation(infer); } else if (infer.name == kInferTpye_PersonReidentification) { object = createPersonReidentification(infer); - } + } + else if (infer.name == kInferTpye_HumanPoseEstimation) { + object = createHumanPoseEstimation(infer); + } else { slog::err << "Invalid inference name: " << infer.name << slog::endl; } @@ -325,6 +327,35 @@ PipelineManager::createPersonReidentification( return reidentification_inference_ptr; } +std::shared_ptr +PipelineManager::createHumanPoseEstimation( + const Params::ParamManager::InferenceParams & infer) +{ + // Following createObjectDetection and createFaceDetection + // 1 input, 2 outputs, 1 batch + auto human_estimation_model = + std::make_shared(infer.model, 1, 2, 1); + human_estimation_model->modelInit(); + auto pose_engine = std::make_shared( + plugins_for_devices_[infer.engine], human_estimation_model); + slog::info << "params: " << infer.min_peaks_distance << ", " << + infer.mid_points_score_threshold << ", " << + infer.found_mid_points_ratio_threshold << ", " << + infer.min_joints_number << ", " << + infer.min_subset_score << slog::endl; + auto pose_inference_ptr = + std::make_shared( + infer.min_peaks_distance, + infer.mid_points_score_threshold, + infer.found_mid_points_ratio_threshold, + infer.min_joints_number, + infer.min_subset_score); + pose_inference_ptr->loadNetwork(human_estimation_model); + pose_inference_ptr->loadEngine(pose_engine); + + return pose_inference_ptr; +} + void PipelineManager::threadPipeline(const char* name) { PipelineData& p = pipelines_[name]; while (p.state == PipelineState_ThreadRunning && p.pipeline != nullptr && ros::ok()) { diff --git a/vino_core_lib/src/services/frame_processing_server.cpp b/vino_core_lib/src/services/frame_processing_server.cpp index 2f9675f4..ad5d3ba4 100644 --- a/vino_core_lib/src/services/frame_processing_server.cpp +++ b/vino_core_lib/src/services/frame_processing_server.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -99,4 +100,5 @@ template class FrameProcessingServer; template class FrameProcessingServer; template class FrameProcessingServer; template class FrameProcessingServer; +template class FrameProcessingServer; } // namespace vino_service diff --git a/vino_launch/launch/image_human_pose_server.launch b/vino_launch/launch/image_human_pose_server.launch new file mode 100644 index 00000000..16c191b1 --- /dev/null +++ b/vino_launch/launch/image_human_pose_server.launch @@ -0,0 +1,8 @@ + + + + + + + diff --git a/vino_launch/launch/pipeline_pose.launch b/vino_launch/launch/pipeline_pose.launch new file mode 100644 index 00000000..879e03e8 --- /dev/null +++ b/vino_launch/launch/pipeline_pose.launch @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + diff --git a/vino_launch/param/image_human_pose_server.yaml b/vino_launch/param/image_human_pose_server.yaml new file mode 100644 index 00000000..6ae92aeb --- /dev/null +++ b/vino_launch/param/image_human_pose_server.yaml @@ -0,0 +1,19 @@ +Pipelines: +- name: people + inputs: [Image] + infers: + - name: HumanPoseEstimation + model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml + engine: MYRIAD + label: to/be/set/xxx.labels + batch: 1 + outputs: [RosService] + confidence_threshold: 0.2 + connects: + - left: Image + right: [HumanPoseEstimation] + - left: HumanPoseEstimation + right: [RosService] + input_path: "/opt/openvino_toolkit/ros_openvino_toolkit/data/images/team.jpg" + +OpenvinoCommon: diff --git a/vino_launch/param/pipeline_pose.yaml b/vino_launch/param/pipeline_pose.yaml new file mode 100644 index 00000000..7ecaeccd --- /dev/null +++ b/vino_launch/param/pipeline_pose.yaml @@ -0,0 +1,22 @@ +Pipelines: +- name: people + inputs: [RealSenseCamera] + infers: + - name: HumanPoseEstimation + model: /opt/openvino_toolkit/open_model_zoo/tools/downloader/intel/human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml + engine: MYRIAD + label: to/be/set/xxx.labels + batch: 1 + min_peaks_distance: 3.0 + mid_points_score_threshold: 0.05 + found_mid_points_ratio_threshold: 0.8 + min_joints_number: 3 + min_subset_score: 0.2 + outputs: [ImageWindow, RosTopic, RViz] + connects: + - left: RealSenseCamera + right: [HumanPoseEstimation] + - left: HumanPoseEstimation + right: [ImageWindow, RosTopic, RViz] + +OpenvinoCommon: diff --git a/vino_param_lib/include/vino_param_lib/param_manager.h b/vino_param_lib/include/vino_param_lib/param_manager.h index 1f0a5cc3..4ab13deb 100644 --- a/vino_param_lib/include/vino_param_lib/param_manager.h +++ b/vino_param_lib/include/vino_param_lib/param_manager.h @@ -70,6 +70,13 @@ class ParamManager // singleton int batch; float confidence_threshold = 0.5; bool enable_roi_constraint = false; + + // Human Pose Estimation parameters. + float min_peaks_distance = 3.0; + float mid_points_score_threshold = 0.05; + float found_mid_points_ratio_threshold = 0.8; + int min_joints_number = 3; + float min_subset_score = 0.2; }; struct PipelineParams { diff --git a/vino_param_lib/src/param_manager.cpp b/vino_param_lib/src/param_manager.cpp index 3d5d2bb6..8f362a87 100644 --- a/vino_param_lib/src/param_manager.cpp +++ b/vino_param_lib/src/param_manager.cpp @@ -105,6 +105,13 @@ void operator>>(const YAML::Node& node, ParamManager::InferenceParams& infer) YAML_PARSE(node, "batch", infer.batch) YAML_PARSE(node, "confidence_threshold", infer.confidence_threshold) YAML_PARSE(node, "enable_roi_constraint", infer.enable_roi_constraint) + + // Human Pose Estimation parameters + YAML_PARSE(node, "min_peaks_distance", infer.min_peaks_distance) + YAML_PARSE(node, "mid_points_score_threshold", infer.mid_points_score_threshold) + YAML_PARSE(node, "found_mid_points_ratio_threshold", infer.found_mid_points_ratio_threshold) + YAML_PARSE(node, "min_joints_number", infer.min_joints_number) + YAML_PARSE(node, "min_subset_score", infer.min_subset_score) slog::info << "Inference Params:name=" << infer.name << slog::endl; } @@ -184,6 +191,11 @@ void ParamManager::print() const slog::info << "\t\tBatch: " << infer.batch << slog::endl; slog::info << "\t\tConfidence_threshold: " << infer.confidence_threshold << slog::endl; slog::info << "\t\tEnable_roi_constraint: " << infer.enable_roi_constraint << slog::endl; + slog::info << "\t\tMin_peaks_distance: " << infer.min_peaks_distance << slog::endl; + slog::info << "\t\tMid_points_score_threshold: " << infer.mid_points_score_threshold << slog::endl; + slog::info << "\t\tFound_mid_points_ratio_threshold: " << infer.found_mid_points_ratio_threshold << slog::endl; + slog::info << "\t\tMin_joints_number: " << infer.min_joints_number << slog::endl; + slog::info << "\t\tMin_subset_score: " << infer.min_subset_score << slog::endl; } slog::info << "\tConnections: " << slog::endl; diff --git a/vino_people_msgs/CMakeLists.txt b/vino_people_msgs/CMakeLists.txt index 8ea715de..42d40f5d 100644 --- a/vino_people_msgs/CMakeLists.txt +++ b/vino_people_msgs/CMakeLists.txt @@ -36,6 +36,9 @@ add_message_files(DIRECTORY msg FILES ObjectsInMasks.msg Reidentification.msg ReidentificationStamped.msg + HumanPoseKeypoint.msg + HumanPose.msg + HumanPoseStamped.msg ) add_service_files(FILES @@ -45,6 +48,7 @@ add_service_files(FILES PeopleSrv.srv ReidentificationSrv.srv ObjectsInMasksSrv.srv + HumanPoseSrv.srv ) generate_messages(DEPENDENCIES std_msgs diff --git a/vino_people_msgs/msg/HumanPose.msg b/vino_people_msgs/msg/HumanPose.msg new file mode 100644 index 00000000..cf5fc2ce --- /dev/null +++ b/vino_people_msgs/msg/HumanPose.msg @@ -0,0 +1,17 @@ +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vino_people_msgs/HumanPoseKeypoint[] keypoints # Keypoints +float32 score # score +sensor_msgs/RegionOfInterest roi # region of interest for a person's body \ No newline at end of file diff --git a/vino_people_msgs/msg/HumanPoseKeypoint.msg b/vino_people_msgs/msg/HumanPoseKeypoint.msg new file mode 100644 index 00000000..a13caa02 --- /dev/null +++ b/vino_people_msgs/msg/HumanPoseKeypoint.msg @@ -0,0 +1,2 @@ +float64 score +geometry_msgs/Point position \ No newline at end of file diff --git a/vino_people_msgs/msg/HumanPoseStamped.msg b/vino_people_msgs/msg/HumanPoseStamped.msg new file mode 100644 index 00000000..8a93b1d7 --- /dev/null +++ b/vino_people_msgs/msg/HumanPoseStamped.msg @@ -0,0 +1,16 @@ +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +std_msgs/Header header +vino_people_msgs/HumanPose[] humanposes diff --git a/vino_people_msgs/srv/HumanPoseSrv.srv b/vino_people_msgs/srv/HumanPoseSrv.srv new file mode 100644 index 00000000..2e468088 --- /dev/null +++ b/vino_people_msgs/srv/HumanPoseSrv.srv @@ -0,0 +1,17 @@ +# Copyright (c) 2017 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +string image_path # input: an image +--- +HumanPose[] humanposes # ObjectInBox array diff --git a/vino_sample/CMakeLists.txt b/vino_sample/CMakeLists.txt index f2cbe305..259a0ebe 100644 --- a/vino_sample/CMakeLists.txt +++ b/vino_sample/CMakeLists.txt @@ -272,6 +272,43 @@ target_link_libraries(image_reidentification_server ${OpenCV_LIBRARIES} ) +add_executable(image_human_pose_client + src/image_human_pose_client.cpp +) + +add_dependencies(image_human_pose_client + ${${PROJECT_NAME}_EXPORTED_TARGETS} + ${catkin_EXPORTED_TARGETS} + ${vino_core_lib_TARGETS} +) + +target_link_libraries(image_human_pose_client + ${catkin_LIBRARIES} + cpu_extension + gflags + ${vino_param_lib_LIBRARIES} + ${InferenceEngine_LIBRARIES} + ${OpenCV_LIBRARIES} +) + +add_executable(image_human_pose_server + src/image_human_pose_server.cpp +) + +add_dependencies(image_human_pose_server + ${${PROJECT_NAME}_EXPORTED_TARGETS} + ${catkin_EXPORTED_TARGETS} + ${vino_core_lib_TARGETS} +) + +target_link_libraries(image_human_pose_server + ${catkin_LIBRARIES} + cpu_extension + gflags + ${vino_param_lib_LIBRARIES} + ${InferenceEngine_LIBRARIES} + ${OpenCV_LIBRARIES} +) if(UNIX OR APPLE) # Linker flags. diff --git a/vino_sample/src/image_human_pose_client.cpp b/vino_sample/src/image_human_pose_client.cpp new file mode 100644 index 00000000..dc2f6c88 --- /dev/null +++ b/vino_sample/src/image_human_pose_client.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* \brief A sample for this library. This sample performs human pose estimation. +* \file src/image_human_pose_client.cpp +*/ + +#include +#include +#include "opencv2/opencv.hpp" + +#include + + + +int main(int argc, char ** argv) +{ + ros::init(argc, argv, "image_human_pose_client"); + + ros::NodeHandle n; + + ros::ServiceClient client = n.serviceClient("/openvino_toolkit/service"); + + + vino_people_msgs::HumanPoseSrv srv; + + if (client.call(srv)) + { + ROS_INFO("Request service success!"); + + for (unsigned int i = 0; i < srv.response.humanposes.size(); i++) { + std::ostringstream os; + os << "Pose " << i << ", keypoints (" << srv.response.humanposes[i].keypoints.size() << "):"; + + for (auto kp : srv.response.humanposes[i].keypoints) + { + os << std::endl<< "\tx: " << kp.position.x << "\ty: " << kp.position.y << "\tscore: " << kp.score; + } + ROS_INFO(os.str().c_str()); + } + + + } + else + { + ROS_ERROR("Failed to request service \"openvino_toolkit/service\" "); return -1; + } + + + + +} + + + diff --git a/vino_sample/src/image_human_pose_server.cpp b/vino_sample/src/image_human_pose_server.cpp new file mode 100644 index 00000000..f5f9d764 --- /dev/null +++ b/vino_sample/src/image_human_pose_server.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* \brief A sample for this library. This sample performs human pose estimation. +* \file src/image_human_pose_server.cpp +*/ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "vino_core_lib/common.h" +#include "vino_core_lib/engines/engine.h" +#include "vino_core_lib/factory.h" +#include "vino_core_lib/inferences/age_gender_detection.h" +#include "vino_core_lib/inferences/base_inference.h" +#include "vino_core_lib/inferences/emotions_detection.h" +#include "vino_core_lib/inferences/face_detection.h" +#include "vino_core_lib/inferences/head_pose_detection.h" +#include "vino_core_lib/inferences/human_pose_estimation.h" +#include "vino_core_lib/inputs/realsense_camera_topic.h" +#include "vino_core_lib/outputs/image_window_output.h" +#include "vino_core_lib/outputs/ros_topic_output.h" +#include "vino_core_lib/outputs/rviz_output.h" +#include "vino_core_lib/pipeline.h" +#include "vino_core_lib/pipeline_manager.h" +#include "vino_core_lib/slog.h" +#include "inference_engine.hpp" +#include "opencv2/opencv.hpp" +#include "vino_sample/utility.hpp" +#include "vino_people_msgs/HumanPoseSrv.h" + +bool parseAndCheckCommandLine(int argc, char** argv) +{ + // -----Parsing and validation of input args--------------------------- + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + if (FLAGS_h) + { + showUsageForParam(); + return false; + } + + return true; +} + +int main(int argc, char** argv) +{ + ros::init(argc, argv, "human_pose_server"); + + if (!parseAndCheckCommandLine(argc, argv)) return 0; + + ros::param::param("~param_file", FLAGS_config, "/param/pipeline_pose.yaml"); + + slog::info << "FLAGS_config=" << FLAGS_config << slog::endl; + + std::string service_name = "/openvino_toolkit/service"; + slog::info << "service name = " << service_name << slog::endl; + // ----- Parsing and validation of input args----------------------- + + + auto node = std::make_shared>(service_name, FLAGS_config); + + slog::info << "Waiting for service request..." << slog::endl; + ros::spin(); + slog::info << "--------------End of Execution--------------" << FLAGS_config << slog::endl; + +}