opencv · alalek · Dec 12, 2021 · Jun 11, 2019 · Jun 11, 2019 · Aug 4, 2021
diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp
@@ -231,7 +231,7 @@ CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCall
 want to work with raw video stream.
 @param frameSize Size of the input video frames.
 @param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams .
+@param params Encoder parameters. See cudacodec::EncoderParams.
 @param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
 SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
 encoding, frames with other formats will be used as is.
@@ -265,7 +265,7 @@ enum Codec
     Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    //!< UYVY (4:2:2)
 };
 
-/** @brief Chroma formats supported by cudacodec::VideoReader .
+/** @brief Chroma formats supported by cudacodec::VideoReader.
  */
 enum ChromaFormat
 {
@@ -276,17 +276,47 @@ enum ChromaFormat
     NumFormats
 };
 
+/** @brief Deinterlacing mode used by decoder.
+* @param Weave Weave both fields (no deinterlacing). For progressive content and for content that doesn't need deinterlacing.
+* Bob Drop one field.
+* @param Adaptive Adaptive deinterlacing needs more video memory than other deinterlacing modes.
+* */
+enum DeinterlaceMode
+{
+    Weave = 0,
+    Bob = 1,
+    Adaptive = 2
+};
+
 /** @brief Struct providing information about video file format. :
  */
 struct FormatInfo
 {
     Codec codec;
     ChromaFormat chromaFormat;
     int nBitDepthMinus8 = -1;
-    int width = 0;//!< Width of the decoded frame returned by nextFrame(frame)
-    int height = 0;//!< Height of the decoded frame returned by nextFrame(frame)
+    int ulWidth = 0;//!< Coded sequence width in pixels.
+    int ulHeight = 0;//!< Coded sequence height in pixels.
+    int width = 0;//!< Width of the decoded frame returned by nextFrame(frame).
+    int height = 0;//!< Height of the decoded frame returned by nextFrame(frame).
+    int ulMaxWidth = 0;
+    int ulMaxHeight = 0;
     Rect displayArea;//!< ROI inside the decoded frame returned by nextFrame(frame), containing the useable video frame.
     bool valid = false;
+    double fps = 0;
+    int ulNumDecodeSurfaces = 0;//!< Maximum number of internal decode surfaces.
+    DeinterlaceMode deinterlaceMode;
+};
+
+/** @brief cv::cudacodec::VideoReader generic properties identifier.
+*/
+enum class VideoReaderProps {
+    PROP_DECODED_FRAME_IDX = 0, //!< Index for retrieving the decoded frame using retrieve().
+    PROP_EXTRA_DATA_INDEX = 1, //!< Index for retrieving the extra data associated with a video source using retrieve().
+    PROP_RAW_PACKAGES_BASE_INDEX = 2, //!< Base index for retrieving raw encoded data using retrieve().
+    PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB = 3, //!< Number of raw packages recieved since the last call to grab().
+    PROP_RAW_MODE = 4, //!< Status of raw mode.
+    PROP_LRF_HAS_KEY_FRAME = 5 //!< FFmpeg source only - Indicates whether the Last Raw Frame (LRF), output from VideoReader::retrieve() when VideoReader is initialized in raw mode, contains encoded data for a key frame.
 };
 
 /** @brief Video reader interface.
@@ -310,6 +340,48 @@ class CV_EXPORTS_W VideoReader
     /** @brief Returns information about video file format.
     */
     virtual FormatInfo format() const = 0;
+
+    /** @brief Grabs the next frame from the video source.
+
+    @return `true` (non-zero) in the case of success.
+
+    The method/function grabs the next frame from video file or camera and returns true (non-zero) in
+    the case of success.
+
+    The primary use of the function is for reading both the encoded and decoded video data when rawMode is enabled.  With rawMode enabled
+    retrieve() can be called following grab() to retrieve all the data associated with the current video source since the last call to grab() or the creation of the VideoReader.
+     */
+    CV_WRAP virtual bool grab(Stream& stream = Stream::Null()) = 0;
+
+    /** @brief Returns previously grabbed video data.
+
+    @param [out] frame The returned data which depends on the provided idx.  If there is no new data since the last call to grab() the image will be empty.
+    @param idx Determins the returned data inside image. The returned data can be the:
+    Decoded frame, idx = get(PROP_DECODED_FRAME_IDX).
+    Extra data if available, idx = get(PROP_EXTRA_DATA_INDEX).
+    Raw encoded data package.  To retrieve package i,  idx = get(PROP_RAW_PACKAGES_BASE_INDEX) + i with i < get(PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB)
+    @return `false` if no frames has been grabbed
+
+    The method returns data associated with the current video source since the last call to grab() or the creation of the VideoReader. If no data is present
+    the method returns false and the function returns an empty image.
+     */
+    CV_WRAP virtual bool retrieve(CV_OUT OutputArray frame, const size_t idx = static_cast<size_t>(VideoReaderProps::PROP_DECODED_FRAME_IDX)) const = 0;
+
+    /** @brief Sets a property in the VideoReader.
+
+    @param property Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::PROP_DECODED_FRAME_IDX, cv::cudacodec::PROP_EXTRA_DATA_INDEX, ...)
+    @param propertyVal Value of the property.
+    @return `true` if the property has been set.
+     */
+    CV_WRAP virtual bool set(const VideoReaderProps property, const double propertyVal) = 0;
+
+    /** @brief Returns the specified VideoReader property
+
+    @param property Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::PROP_DECODED_FRAME_IDX, cv::cudacodec::PROP_EXTRA_DATA_INDEX, ...)
+    @param propertyVal Optional value for the property.
+    @return Value for the specified property. Value -1 is returned when querying a property that is not supported.
+    */
+    CV_WRAP virtual int get(const VideoReaderProps property, const int propertyVal = -1) const = 0;
 };
 
 /** @brief Interface for video demultiplexing. :
@@ -328,26 +400,39 @@ class CV_EXPORTS_W RawVideoSource
      */
     virtual bool getNextPacket(unsigned char** data, size_t* size) = 0;
 
+    /** @brief Returns true if the last packet contained a key frame.
+     */
+    virtual bool lastPacketContainsKeyFrame() const { return false; }
+
     /** @brief Returns information about video file format.
     */
     virtual FormatInfo format() const = 0;
 
     /** @brief Updates the coded width and height inside format.
     */
-    virtual void updateFormat(const int codedWidth, const int codedHeight) = 0;
+    virtual void updateFormat(const FormatInfo& videoFormat) = 0;
+
+    /** @brief Returns any extra data associated with the video source.
+
+    @param extraData 1D cv::Mat containing the extra data if it exists.
+     */
+    virtual void getExtraData(cv::Mat& extraData) const = 0;
 };
 
 /** @brief Creates video reader.
 
 @param filename Name of the input video file.
+@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
 
 FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource
  */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename, const bool rawMode = false);
+
 /** @overload
 @param source RAW video source implemented by user.
+@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
 */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode = false);
 
 //! @}
 

diff --git a/modules/cudacodec/src/cuvid_video_source.cpp b/modules/cudacodec/src/cuvid_video_source.cpp
@@ -76,6 +76,8 @@ cv::cudacodec::detail::CuvidVideoSource::CuvidVideoSource(const String& fname)
     format_.height = vidfmt.coded_height;
     format_.displayArea = Rect(Point(vidfmt.display_area.left, vidfmt.display_area.top), Point(vidfmt.display_area.right, vidfmt.display_area.bottom));
     format_.valid = true;
+    if (vidfmt.frame_rate.numerator != 0 && vidfmt.frame_rate.denominator != 0)
+        format_.fps = vidfmt.frame_rate.numerator / (double)vidfmt.frame_rate.denominator;
 }
 
 cv::cudacodec::detail::CuvidVideoSource::~CuvidVideoSource()
@@ -88,10 +90,9 @@ FormatInfo cv::cudacodec::detail::CuvidVideoSource::format() const
     return format_;
 }
 
-void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const int codedWidth, const int codedHeight)
+void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const FormatInfo& videoFormat)
 {
-    format_.width = codedWidth;
-    format_.height = codedHeight;
+    format_ = videoFormat;
     format_.valid = true;
 }
 
@@ -119,7 +120,7 @@ int CUDAAPI cv::cudacodec::detail::CuvidVideoSource::HandleVideoData(void* userD
 {
     CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
 
-    return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
+    return thiz->parseVideoData(packet->payload, packet->payload_size, thiz->RawModeEnabled(), false, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
 }
 
 #endif // HAVE_NVCUVID
diff --git a/modules/cudacodec/src/cuvid_video_source.hpp b/modules/cudacodec/src/cuvid_video_source.hpp
@@ -55,7 +55,7 @@ class CuvidVideoSource : public VideoSource
     ~CuvidVideoSource();
 
     FormatInfo format() const CV_OVERRIDE;
-    void updateFormat(const int codedWidth, const int codedHeight);
+    void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
     void start() CV_OVERRIDE;
     void stop() CV_OVERRIDE;
     bool isStarted() const CV_OVERRIDE;

diff --git a/modules/cudacodec/src/ffmpeg_video_source.cpp b/modules/cudacodec/src/ffmpeg_video_source.cpp
@@ -75,6 +75,7 @@ Codec FourccToCodec(int codec)
     case CV_FOURCC_MACRO('M', 'P', 'G', '1'): return MPEG1;
     case CV_FOURCC_MACRO('M', 'P', 'G', '2'): return MPEG2;
     case CV_FOURCC_MACRO('X', 'V', 'I', 'D'): // fallthru
+    case CV_FOURCC_MACRO('m', 'p', '4', 'v'): // fallthru
     case CV_FOURCC_MACRO('D', 'I', 'V', 'X'): return MPEG4;
     case CV_FOURCC_MACRO('W', 'V', 'C', '1'): return VC1;
     case CV_FOURCC_MACRO('H', '2', '6', '4'): // fallthru
@@ -112,6 +113,22 @@ void FourccToChromaFormat(const int pixelFormat, ChromaFormat &chromaFormat, int
     }
 }
 
+int StartCodeLen(unsigned char* data, const int sz) {
+    if (sz >= 3 && data[0] == 0 && data[1] == 0 && data[2] == 1)
+        return 3;
+    else if (sz >= 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 1)
+        return 4;
+    else
+        return 0;
+}
+
+bool ParamSetsExist(unsigned char* parameterSets, const int szParameterSets, unsigned char* data, const int szData) {
+    const int paramSetStartCodeLen = StartCodeLen(parameterSets, szParameterSets);
+    const int packetStartCodeLen = StartCodeLen(data, szData);
+    // weak test to see if the parameter set has already been included in the RTP stream
+    return paramSetStartCodeLen != 0 && packetStartCodeLen != 0 && parameterSets[paramSetStartCodeLen] == data[packetStartCodeLen];
+}
+
 cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
 {
     if (!videoio_registry::hasBackend(CAP_FFMPEG))
@@ -125,6 +142,11 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
         CV_Error(Error::StsUnsupportedFormat, "Fetching of RAW video streams is not supported");
     CV_Assert(cap.get(CAP_PROP_FORMAT) == -1);
 
+    const int codecExtradataIndex = static_cast<int>(cap.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
+    Mat tmpExtraData;
+    if (cap.retrieve(tmpExtraData, codecExtradataIndex) && tmpExtraData.total())
+        extraData = tmpExtraData.clone();
+
     int codec = (int)cap.get(CAP_PROP_FOURCC);
     int pixelFormat = (int)cap.get(CAP_PROP_CODEC_PIXEL_FORMAT);
 
@@ -133,6 +155,7 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
     format_.width = cap.get(CAP_PROP_FRAME_WIDTH);
     format_.displayArea = Rect(0, 0, format_.width, format_.height);
     format_.valid = false;
+    format_.fps = cap.get(CAP_PROP_FPS);
     FourccToChromaFormat(pixelFormat, format_.chromaFormat, format_.nBitDepthMinus8);
 }
 
@@ -147,10 +170,9 @@ FormatInfo cv::cudacodec::detail::FFmpegVideoSource::format() const
     return format_;
 }
 
-void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const int codedWidth, const int codedHeight)
+void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const FormatInfo& videoFormat)
 {
-    format_.width = codedWidth;
-    format_.height = codedHeight;
+    format_ = videoFormat;
     format_.valid = true;
 }
 
@@ -159,7 +181,26 @@ bool cv::cudacodec::detail::FFmpegVideoSource::getNextPacket(unsigned char** dat
     cap >> rawFrame;
     *data = rawFrame.data;
     *size = rawFrame.total();
+    if (iFrame++ == 0 && extraData.total()) {
+        if (format_.codec == Codec::MPEG4 ||
+            ((format_.codec == Codec::H264 || format_.codec == Codec::HEVC) && !ParamSetsExist(extraData.data, extraData.total(), *data, *size)))
+        {
+            const size_t nBytesToTrimFromData = format_.codec == Codec::MPEG4 ? 3 : 0;
+            const size_t newSz = extraData.total() + *size - nBytesToTrimFromData;
+            dataWithHeader = Mat(1, newSz, CV_8UC1);
+            memcpy(dataWithHeader.data, extraData.data, extraData.total());
+            memcpy(dataWithHeader.data + extraData.total(), (*data) + nBytesToTrimFromData, *size - nBytesToTrimFromData);
+            *data = dataWithHeader.data;
+            *size = newSz;
+        }
+    }
+
     return *size != 0;
 }
 
+bool cv::cudacodec::detail::FFmpegVideoSource::lastPacketContainsKeyFrame() const
+{
+    return cap.get(CAP_PROP_LRF_HAS_KEY_FRAME);
+}
+
 #endif // HAVE_CUDA
diff --git a/modules/cudacodec/src/ffmpeg_video_source.hpp b/modules/cudacodec/src/ffmpeg_video_source.hpp
@@ -56,15 +56,19 @@ class FFmpegVideoSource : public RawVideoSource
 
     bool getNextPacket(unsigned char** data, size_t* size) CV_OVERRIDE;
 
+    bool lastPacketContainsKeyFrame() const;
+
     FormatInfo format() const CV_OVERRIDE;
 
-    void updateFormat(const int codedWidth, const int codedHeight);
+    void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
 
+    void getExtraData(cv::Mat& _extraData) const CV_OVERRIDE { _extraData = extraData; }
 
 private:
     FormatInfo format_;
     VideoCapture cap;
-    Mat rawFrame;
+    Mat rawFrame, extraData, dataWithHeader;
+    int iFrame = 0;
 };
 
 }}}

diff --git a/modules/cudacodec/src/frame_queue.cpp b/modules/cudacodec/src/frame_queue.cpp
@@ -45,13 +45,22 @@
 
 #ifdef HAVE_NVCUVID
 
-cv::cudacodec::detail::FrameQueue::FrameQueue() :
-    endOfDecode_(0),
-    framesInQueue_(0),
-    readPosition_(0)
-{
-    std::memset(displayQueue_, 0, sizeof(displayQueue_));
-    std::memset((void*) isFrameInUse_, 0, sizeof(isFrameInUse_));
+RawPacket::RawPacket(const unsigned char* _data, const size_t _size, const bool _containsKeyFrame) : size(_size), containsKeyFrame(_containsKeyFrame) {
+    data = cv::makePtr<unsigned char*>(new unsigned char[size]);
+    memcpy(*data, _data, size);
+};
+
+cv::cudacodec::detail::FrameQueue::~FrameQueue() {
+    if (isFrameInUse_)
+        delete[] isFrameInUse_;
+}
+
+void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) {
+    AutoLock autoLock(mtx_);
+    maxSz = _maxSz;
+    displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
+    isFrameInUse_ = new volatile int[maxSz];
+    std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
 }
 
 bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
@@ -68,7 +77,7 @@ bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex
     return true;
 }
 
-void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
+void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets)
 {
     // Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
     // for display
@@ -82,10 +91,12 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
         {
             AutoLock autoLock(mtx_);
 
-            if (framesInQueue_ < MaximumSize)
+            if (framesInQueue_ < maxSz)
             {
-                int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
-                displayQueue_[writePosition] = *picParams;
+                const int writePosition = (readPosition_ + framesInQueue_) % maxSz;
+                displayQueue_.at(writePosition) = *picParams;
+                for (const auto& rawPacket : rawPackets)
+                    rawPacketQueue.push(rawPacket);
                 framesInQueue_++;
                 isFramePlaced = true;
             }
@@ -99,15 +110,19 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
     } while (!isEndOfDecode());
 }
 
-bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
+bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets)
 {
     AutoLock autoLock(mtx_);
 
     if (framesInQueue_ > 0)
     {
         int entry = readPosition_;
-        displayInfo = displayQueue_[entry];
-        readPosition_ = (entry + 1) % MaximumSize;
+        displayInfo = displayQueue_.at(entry);
+        while (!rawPacketQueue.empty()) {
+            rawPackets.push_back(rawPacketQueue.front());
+            rawPacketQueue.pop();
+        }
+        readPosition_ = (entry + 1) % maxSz;
         framesInQueue_--;
         return true;
     }