Skip to content

Commit 1cecd2c

Browse files
authored
Merge pull request #3098 from cudawarped:rtsp_video_write
Add writing encoded video streams to disk when streaming from RTSP sources using cudacodec * Add missing codecs to cudacodec which uses Nvidia Video Codec SDK including checks to ensure codec used in input video file is supported on the current device. * Update cudacoded to 1) automatically write the raw encoded video stream to a video file and, 2) use less GPU memory by more closely mirroring the Nvidia samples. Specifically querying the decoder for the number of decode surfaces (h265 commonly uses 4) instead of always using 20 and not using adaptive deinterlacing when the video sequence is progressive. Additional updates to mirror the Nvidia sample include initializing the decoder so that HandleVideoSequence() gets called every time before the decoder is initialized, ensuring all the parameters for the decoder are provided by nvcudec. Added facility to decode AV1, not tested as VideoCapture doesn't return a valid fourcc for this. Add facility to decode MPEG4 video - requires modification to VideoCapture see pull request. * Prevent adding parameter sets twice and add zero padding to output files to that they play in vlc. Notes: VideoCapture - returns mpeg as the codec for mpeg4 files, so files written as .m4v from mpeg4 sources cannot currently be decoded. This is also true for AV1 sources where cap.get(CAP_PROP_FOURCC) returns 0. Added mpeg4 test file which can be decoded when VideoCapture adds the extra_data. * Update to account for the extraData being passed from cap.retrieve instead of appended to the first packet. * Update to be compatible with changes to VideoCapture * Remove redundant test. * Add check to ensure retrieve is successful. * Remove writeToFile and allow VideoReader to return raw encoded data at the same time as decoded frames. * Fix missing documentation.
1 parent 8bd6316 commit 1cecd2c

15 files changed

+595
-137
lines changed

modules/cudacodec/include/opencv2/cudacodec.hpp

Lines changed: 92 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCall
231231
want to work with raw video stream.
232232
@param frameSize Size of the input video frames.
233233
@param fps Framerate of the created video stream.
234-
@param params Encoder parameters. See cudacodec::EncoderParams .
234+
@param params Encoder parameters. See cudacodec::EncoderParams.
235235
@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
236236
SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
237237
encoding, frames with other formats will be used as is.
@@ -265,7 +265,7 @@ enum Codec
265265
Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) //!< UYVY (4:2:2)
266266
};
267267

268-
/** @brief Chroma formats supported by cudacodec::VideoReader .
268+
/** @brief Chroma formats supported by cudacodec::VideoReader.
269269
*/
270270
enum ChromaFormat
271271
{
@@ -276,17 +276,47 @@ enum ChromaFormat
276276
NumFormats
277277
};
278278

279+
/** @brief Deinterlacing mode used by decoder.
280+
* @param Weave Weave both fields (no deinterlacing). For progressive content and for content that doesn't need deinterlacing.
281+
* Bob Drop one field.
282+
* @param Adaptive Adaptive deinterlacing needs more video memory than other deinterlacing modes.
283+
* */
284+
enum DeinterlaceMode
285+
{
286+
Weave = 0,
287+
Bob = 1,
288+
Adaptive = 2
289+
};
290+
279291
/** @brief Struct providing information about video file format. :
280292
*/
281293
struct FormatInfo
282294
{
283295
Codec codec;
284296
ChromaFormat chromaFormat;
285297
int nBitDepthMinus8 = -1;
286-
int width = 0;//!< Width of the decoded frame returned by nextFrame(frame)
287-
int height = 0;//!< Height of the decoded frame returned by nextFrame(frame)
298+
int ulWidth = 0;//!< Coded sequence width in pixels.
299+
int ulHeight = 0;//!< Coded sequence height in pixels.
300+
int width = 0;//!< Width of the decoded frame returned by nextFrame(frame).
301+
int height = 0;//!< Height of the decoded frame returned by nextFrame(frame).
302+
int ulMaxWidth = 0;
303+
int ulMaxHeight = 0;
288304
Rect displayArea;//!< ROI inside the decoded frame returned by nextFrame(frame), containing the useable video frame.
289305
bool valid = false;
306+
double fps = 0;
307+
int ulNumDecodeSurfaces = 0;//!< Maximum number of internal decode surfaces.
308+
DeinterlaceMode deinterlaceMode;
309+
};
310+
311+
/** @brief cv::cudacodec::VideoReader generic properties identifier.
312+
*/
313+
enum class VideoReaderProps {
314+
PROP_DECODED_FRAME_IDX = 0, //!< Index for retrieving the decoded frame using retrieve().
315+
PROP_EXTRA_DATA_INDEX = 1, //!< Index for retrieving the extra data associated with a video source using retrieve().
316+
PROP_RAW_PACKAGES_BASE_INDEX = 2, //!< Base index for retrieving raw encoded data using retrieve().
317+
PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB = 3, //!< Number of raw packages recieved since the last call to grab().
318+
PROP_RAW_MODE = 4, //!< Status of raw mode.
319+
PROP_LRF_HAS_KEY_FRAME = 5 //!< FFmpeg source only - Indicates whether the Last Raw Frame (LRF), output from VideoReader::retrieve() when VideoReader is initialized in raw mode, contains encoded data for a key frame.
290320
};
291321

292322
/** @brief Video reader interface.
@@ -310,6 +340,48 @@ class CV_EXPORTS_W VideoReader
310340
/** @brief Returns information about video file format.
311341
*/
312342
virtual FormatInfo format() const = 0;
343+
344+
/** @brief Grabs the next frame from the video source.
345+
346+
@return `true` (non-zero) in the case of success.
347+
348+
The method/function grabs the next frame from video file or camera and returns true (non-zero) in
349+
the case of success.
350+
351+
The primary use of the function is for reading both the encoded and decoded video data when rawMode is enabled. With rawMode enabled
352+
retrieve() can be called following grab() to retrieve all the data associated with the current video source since the last call to grab() or the creation of the VideoReader.
353+
*/
354+
CV_WRAP virtual bool grab(Stream& stream = Stream::Null()) = 0;
355+
356+
/** @brief Returns previously grabbed video data.
357+
358+
@param [out] frame The returned data which depends on the provided idx. If there is no new data since the last call to grab() the image will be empty.
359+
@param idx Determins the returned data inside image. The returned data can be the:
360+
Decoded frame, idx = get(PROP_DECODED_FRAME_IDX).
361+
Extra data if available, idx = get(PROP_EXTRA_DATA_INDEX).
362+
Raw encoded data package. To retrieve package i, idx = get(PROP_RAW_PACKAGES_BASE_INDEX) + i with i < get(PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB)
363+
@return `false` if no frames has been grabbed
364+
365+
The method returns data associated with the current video source since the last call to grab() or the creation of the VideoReader. If no data is present
366+
the method returns false and the function returns an empty image.
367+
*/
368+
CV_WRAP virtual bool retrieve(CV_OUT OutputArray frame, const size_t idx = static_cast<size_t>(VideoReaderProps::PROP_DECODED_FRAME_IDX)) const = 0;
369+
370+
/** @brief Sets a property in the VideoReader.
371+
372+
@param property Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::PROP_DECODED_FRAME_IDX, cv::cudacodec::PROP_EXTRA_DATA_INDEX, ...)
373+
@param propertyVal Value of the property.
374+
@return `true` if the property has been set.
375+
*/
376+
CV_WRAP virtual bool set(const VideoReaderProps property, const double propertyVal) = 0;
377+
378+
/** @brief Returns the specified VideoReader property
379+
380+
@param property Property identifier from cv::cudacodec::VideoReaderProps (eg. cv::cudacodec::PROP_DECODED_FRAME_IDX, cv::cudacodec::PROP_EXTRA_DATA_INDEX, ...)
381+
@param propertyVal Optional value for the property.
382+
@return Value for the specified property. Value -1 is returned when querying a property that is not supported.
383+
*/
384+
CV_WRAP virtual int get(const VideoReaderProps property, const int propertyVal = -1) const = 0;
313385
};
314386

315387
/** @brief Interface for video demultiplexing. :
@@ -328,26 +400,39 @@ class CV_EXPORTS_W RawVideoSource
328400
*/
329401
virtual bool getNextPacket(unsigned char** data, size_t* size) = 0;
330402

403+
/** @brief Returns true if the last packet contained a key frame.
404+
*/
405+
virtual bool lastPacketContainsKeyFrame() const { return false; }
406+
331407
/** @brief Returns information about video file format.
332408
*/
333409
virtual FormatInfo format() const = 0;
334410

335411
/** @brief Updates the coded width and height inside format.
336412
*/
337-
virtual void updateFormat(const int codedWidth, const int codedHeight) = 0;
413+
virtual void updateFormat(const FormatInfo& videoFormat) = 0;
414+
415+
/** @brief Returns any extra data associated with the video source.
416+
417+
@param extraData 1D cv::Mat containing the extra data if it exists.
418+
*/
419+
virtual void getExtraData(cv::Mat& extraData) const = 0;
338420
};
339421

340422
/** @brief Creates video reader.
341423
342424
@param filename Name of the input video file.
425+
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
343426
344427
FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource
345428
*/
346-
CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename);
429+
CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename, const bool rawMode = false);
430+
347431
/** @overload
348432
@param source RAW video source implemented by user.
433+
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
349434
*/
350-
CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source);
435+
CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode = false);
351436

352437
//! @}
353438

modules/cudacodec/src/cuvid_video_source.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ cv::cudacodec::detail::CuvidVideoSource::CuvidVideoSource(const String& fname)
7676
format_.height = vidfmt.coded_height;
7777
format_.displayArea = Rect(Point(vidfmt.display_area.left, vidfmt.display_area.top), Point(vidfmt.display_area.right, vidfmt.display_area.bottom));
7878
format_.valid = true;
79+
if (vidfmt.frame_rate.numerator != 0 && vidfmt.frame_rate.denominator != 0)
80+
format_.fps = vidfmt.frame_rate.numerator / (double)vidfmt.frame_rate.denominator;
7981
}
8082

8183
cv::cudacodec::detail::CuvidVideoSource::~CuvidVideoSource()
@@ -88,10 +90,9 @@ FormatInfo cv::cudacodec::detail::CuvidVideoSource::format() const
8890
return format_;
8991
}
9092

91-
void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const int codedWidth, const int codedHeight)
93+
void cv::cudacodec::detail::CuvidVideoSource::updateFormat(const FormatInfo& videoFormat)
9294
{
93-
format_.width = codedWidth;
94-
format_.height = codedHeight;
95+
format_ = videoFormat;
9596
format_.valid = true;
9697
}
9798

@@ -119,7 +120,7 @@ int CUDAAPI cv::cudacodec::detail::CuvidVideoSource::HandleVideoData(void* userD
119120
{
120121
CuvidVideoSource* thiz = static_cast<CuvidVideoSource*>(userData);
121122

122-
return thiz->parseVideoData(packet->payload, packet->payload_size, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
123+
return thiz->parseVideoData(packet->payload, packet->payload_size, thiz->RawModeEnabled(), false, (packet->flags & CUVID_PKT_ENDOFSTREAM) != 0);
123124
}
124125

125126
#endif // HAVE_NVCUVID

modules/cudacodec/src/cuvid_video_source.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class CuvidVideoSource : public VideoSource
5555
~CuvidVideoSource();
5656

5757
FormatInfo format() const CV_OVERRIDE;
58-
void updateFormat(const int codedWidth, const int codedHeight);
58+
void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
5959
void start() CV_OVERRIDE;
6060
void stop() CV_OVERRIDE;
6161
bool isStarted() const CV_OVERRIDE;

modules/cudacodec/src/ffmpeg_video_source.cpp

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Codec FourccToCodec(int codec)
7575
case CV_FOURCC_MACRO('M', 'P', 'G', '1'): return MPEG1;
7676
case CV_FOURCC_MACRO('M', 'P', 'G', '2'): return MPEG2;
7777
case CV_FOURCC_MACRO('X', 'V', 'I', 'D'): // fallthru
78+
case CV_FOURCC_MACRO('m', 'p', '4', 'v'): // fallthru
7879
case CV_FOURCC_MACRO('D', 'I', 'V', 'X'): return MPEG4;
7980
case CV_FOURCC_MACRO('W', 'V', 'C', '1'): return VC1;
8081
case CV_FOURCC_MACRO('H', '2', '6', '4'): // fallthru
@@ -112,6 +113,22 @@ void FourccToChromaFormat(const int pixelFormat, ChromaFormat &chromaFormat, int
112113
}
113114
}
114115

116+
int StartCodeLen(unsigned char* data, const int sz) {
117+
if (sz >= 3 && data[0] == 0 && data[1] == 0 && data[2] == 1)
118+
return 3;
119+
else if (sz >= 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && data[3] == 1)
120+
return 4;
121+
else
122+
return 0;
123+
}
124+
125+
bool ParamSetsExist(unsigned char* parameterSets, const int szParameterSets, unsigned char* data, const int szData) {
126+
const int paramSetStartCodeLen = StartCodeLen(parameterSets, szParameterSets);
127+
const int packetStartCodeLen = StartCodeLen(data, szData);
128+
// weak test to see if the parameter set has already been included in the RTP stream
129+
return paramSetStartCodeLen != 0 && packetStartCodeLen != 0 && parameterSets[paramSetStartCodeLen] == data[packetStartCodeLen];
130+
}
131+
115132
cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
116133
{
117134
if (!videoio_registry::hasBackend(CAP_FFMPEG))
@@ -125,6 +142,11 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
125142
CV_Error(Error::StsUnsupportedFormat, "Fetching of RAW video streams is not supported");
126143
CV_Assert(cap.get(CAP_PROP_FORMAT) == -1);
127144

145+
const int codecExtradataIndex = static_cast<int>(cap.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
146+
Mat tmpExtraData;
147+
if (cap.retrieve(tmpExtraData, codecExtradataIndex) && tmpExtraData.total())
148+
extraData = tmpExtraData.clone();
149+
128150
int codec = (int)cap.get(CAP_PROP_FOURCC);
129151
int pixelFormat = (int)cap.get(CAP_PROP_CODEC_PIXEL_FORMAT);
130152

@@ -133,6 +155,7 @@ cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname)
133155
format_.width = cap.get(CAP_PROP_FRAME_WIDTH);
134156
format_.displayArea = Rect(0, 0, format_.width, format_.height);
135157
format_.valid = false;
158+
format_.fps = cap.get(CAP_PROP_FPS);
136159
FourccToChromaFormat(pixelFormat, format_.chromaFormat, format_.nBitDepthMinus8);
137160
}
138161

@@ -147,10 +170,9 @@ FormatInfo cv::cudacodec::detail::FFmpegVideoSource::format() const
147170
return format_;
148171
}
149172

150-
void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const int codedWidth, const int codedHeight)
173+
void cv::cudacodec::detail::FFmpegVideoSource::updateFormat(const FormatInfo& videoFormat)
151174
{
152-
format_.width = codedWidth;
153-
format_.height = codedHeight;
175+
format_ = videoFormat;
154176
format_.valid = true;
155177
}
156178

@@ -159,7 +181,26 @@ bool cv::cudacodec::detail::FFmpegVideoSource::getNextPacket(unsigned char** dat
159181
cap >> rawFrame;
160182
*data = rawFrame.data;
161183
*size = rawFrame.total();
184+
if (iFrame++ == 0 && extraData.total()) {
185+
if (format_.codec == Codec::MPEG4 ||
186+
((format_.codec == Codec::H264 || format_.codec == Codec::HEVC) && !ParamSetsExist(extraData.data, extraData.total(), *data, *size)))
187+
{
188+
const size_t nBytesToTrimFromData = format_.codec == Codec::MPEG4 ? 3 : 0;
189+
const size_t newSz = extraData.total() + *size - nBytesToTrimFromData;
190+
dataWithHeader = Mat(1, newSz, CV_8UC1);
191+
memcpy(dataWithHeader.data, extraData.data, extraData.total());
192+
memcpy(dataWithHeader.data + extraData.total(), (*data) + nBytesToTrimFromData, *size - nBytesToTrimFromData);
193+
*data = dataWithHeader.data;
194+
*size = newSz;
195+
}
196+
}
197+
162198
return *size != 0;
163199
}
164200

201+
bool cv::cudacodec::detail::FFmpegVideoSource::lastPacketContainsKeyFrame() const
202+
{
203+
return cap.get(CAP_PROP_LRF_HAS_KEY_FRAME);
204+
}
205+
165206
#endif // HAVE_CUDA

modules/cudacodec/src/ffmpeg_video_source.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,19 @@ class FFmpegVideoSource : public RawVideoSource
5656

5757
bool getNextPacket(unsigned char** data, size_t* size) CV_OVERRIDE;
5858

59+
bool lastPacketContainsKeyFrame() const;
60+
5961
FormatInfo format() const CV_OVERRIDE;
6062

61-
void updateFormat(const int codedWidth, const int codedHeight);
63+
void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
6264

65+
void getExtraData(cv::Mat& _extraData) const CV_OVERRIDE { _extraData = extraData; }
6366

6467
private:
6568
FormatInfo format_;
6669
VideoCapture cap;
67-
Mat rawFrame;
70+
Mat rawFrame, extraData, dataWithHeader;
71+
int iFrame = 0;
6872
};
6973

7074
}}}

modules/cudacodec/src/frame_queue.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,22 @@
4545

4646
#ifdef HAVE_NVCUVID
4747

48-
cv::cudacodec::detail::FrameQueue::FrameQueue() :
49-
endOfDecode_(0),
50-
framesInQueue_(0),
51-
readPosition_(0)
52-
{
53-
std::memset(displayQueue_, 0, sizeof(displayQueue_));
54-
std::memset((void*) isFrameInUse_, 0, sizeof(isFrameInUse_));
48+
RawPacket::RawPacket(const unsigned char* _data, const size_t _size, const bool _containsKeyFrame) : size(_size), containsKeyFrame(_containsKeyFrame) {
49+
data = cv::makePtr<unsigned char*>(new unsigned char[size]);
50+
memcpy(*data, _data, size);
51+
};
52+
53+
cv::cudacodec::detail::FrameQueue::~FrameQueue() {
54+
if (isFrameInUse_)
55+
delete[] isFrameInUse_;
56+
}
57+
58+
void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) {
59+
AutoLock autoLock(mtx_);
60+
maxSz = _maxSz;
61+
displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
62+
isFrameInUse_ = new volatile int[maxSz];
63+
std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
5564
}
5665

5766
bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
@@ -68,7 +77,7 @@ bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex
6877
return true;
6978
}
7079

71-
void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams)
80+
void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets)
7281
{
7382
// Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed
7483
// for display
@@ -82,10 +91,12 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
8291
{
8392
AutoLock autoLock(mtx_);
8493

85-
if (framesInQueue_ < MaximumSize)
94+
if (framesInQueue_ < maxSz)
8695
{
87-
int writePosition = (readPosition_ + framesInQueue_) % MaximumSize;
88-
displayQueue_[writePosition] = *picParams;
96+
const int writePosition = (readPosition_ + framesInQueue_) % maxSz;
97+
displayQueue_.at(writePosition) = *picParams;
98+
for (const auto& rawPacket : rawPackets)
99+
rawPacketQueue.push(rawPacket);
89100
framesInQueue_++;
90101
isFramePlaced = true;
91102
}
@@ -99,15 +110,19 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
99110
} while (!isEndOfDecode());
100111
}
101112

102-
bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo)
113+
bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets)
103114
{
104115
AutoLock autoLock(mtx_);
105116

106117
if (framesInQueue_ > 0)
107118
{
108119
int entry = readPosition_;
109-
displayInfo = displayQueue_[entry];
110-
readPosition_ = (entry + 1) % MaximumSize;
120+
displayInfo = displayQueue_.at(entry);
121+
while (!rawPacketQueue.empty()) {
122+
rawPackets.push_back(rawPacketQueue.front());
123+
rawPacketQueue.pop();
124+
}
125+
readPosition_ = (entry + 1) % maxSz;
111126
framesInQueue_--;
112127
return true;
113128
}

0 commit comments

Comments
 (0)