diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp index 9ff2bd75bb5..845a0cc80ad 100644 --- a/torchvision/csrc/io/decoder/decoder.cpp +++ b/torchvision/csrc/io/decoder/decoder.cpp @@ -18,25 +18,6 @@ constexpr size_t kIoBufferSize = 96 * 1024; constexpr size_t kIoPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE; constexpr size_t kLogBufferSize = 1024; -int ffmpeg_lock(void** mutex, enum AVLockOp op) { - std::mutex** handle = (std::mutex**)mutex; - switch (op) { - case AV_LOCK_CREATE: - *handle = new std::mutex(); - break; - case AV_LOCK_OBTAIN: - (*handle)->lock(); - break; - case AV_LOCK_RELEASE: - (*handle)->unlock(); - break; - case AV_LOCK_DESTROY: - delete *handle; - break; - } - return 0; -} - bool mapFfmpegType(AVMediaType media, MediaType* type) { switch (media) { case AVMEDIA_TYPE_AUDIO: @@ -202,8 +183,6 @@ void Decoder::initOnce() { avcodec_register_all(); #endif avformat_network_init(); - // register ffmpeg lock manager - av_lockmgr_register(&ffmpeg_lock); av_log_set_callback(Decoder::logFunction); av_log_set_level(AV_LOG_ERROR); VLOG(1) << "Registered ffmpeg libs"; @@ -277,7 +256,7 @@ bool Decoder::init( break; } - fmt = av_find_input_format(fmtName); + fmt = (AVInputFormat*)av_find_input_format(fmtName); } const size_t avioCtxBufferSize = kIoBufferSize; @@ -495,8 +474,8 @@ void Decoder::cleanUp() { // function does actual work, derived class calls it in working thread // periodically. On success method returns 0, ENODATA on EOF, ETIMEDOUT if -// no frames got decoded in the specified timeout time, and error on -// unrecoverable error. +// no frames got decoded in the specified timeout time, AVERROR_BUFFER_TOO_SMALL +// when unable to allocate packet and error on unrecoverable error int Decoder::getFrame(size_t workingTimeInMs) { if (inRange_.none()) { return ENODATA; @@ -505,10 +484,15 @@ int Decoder::getFrame(size_t workingTimeInMs) { // once decode() method gets called and grab some bytes // run this method again // init package - AVPacket avPacket; - av_init_packet(&avPacket); - avPacket.data = nullptr; - avPacket.size = 0; + // update 03/22: moving memory management to ffmpeg + AVPacket* avPacket; + avPacket = av_packet_alloc(); + if (avPacket == nullptr) { + LOG(ERROR) << "decoder as not able to allocate the packet."; + return AVERROR_BUFFER_TOO_SMALL; + } + avPacket->data = nullptr; + avPacket->size = 0; auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(workingTimeInMs); @@ -520,8 +504,12 @@ int Decoder::getFrame(size_t workingTimeInMs) { int result = 0; size_t decodingErrors = 0; bool decodedFrame = false; - while (!interrupted_ && inRange_.any() && !decodedFrame && watcher()) { - result = av_read_frame(inputCtx_, &avPacket); + while (!interrupted_ && inRange_.any() && !decodedFrame) { + if (watcher() == false) { + result = ETIMEDOUT; + break; + } + result = av_read_frame(inputCtx_, avPacket); if (result == AVERROR(EAGAIN)) { VLOG(4) << "Decoder is busy..."; std::this_thread::yield(); @@ -538,10 +526,11 @@ int Decoder::getFrame(size_t workingTimeInMs) { break; } - // get stream - auto stream = findByIndex(avPacket.stream_index); + // get stream; if stream cannot be found reset the packet to + // default settings + auto stream = findByIndex(avPacket->stream_index); if (stream == nullptr || !inRange_.test(stream->getIndex())) { - av_packet_unref(&avPacket); + av_packet_unref(avPacket); continue; } @@ -553,7 +542,7 @@ int Decoder::getFrame(size_t workingTimeInMs) { bool hasMsg = false; // packet either got consumed completely or not at all if ((result = processPacket( - stream, &avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) { + stream, avPacket, &gotFrame, &hasMsg, params_.fastSeek)) < 0) { LOG(ERROR) << "processPacket failed with code: " << result; break; } @@ -585,11 +574,10 @@ int Decoder::getFrame(size_t workingTimeInMs) { result = 0; - av_packet_unref(&avPacket); + av_packet_unref(avPacket); } - av_packet_unref(&avPacket); - + av_packet_free(&avPacket); VLOG(2) << "Interrupted loop" << ", interrupted_ " << interrupted_ << ", inRange_.any() " << inRange_.any() << ", decodedFrame " << decodedFrame << ", result " @@ -597,8 +585,7 @@ int Decoder::getFrame(size_t workingTimeInMs) { // loop can be terminated, either by: // 1. explcitly iterrupted - // 2. terminated by workable timeout - // 3. unrecoverable error or ENODATA (end of stream) + // 3. unrecoverable error or ENODATA (end of stream) or ETIMEDOUT (timeout) // 4. decoded frames pts are out of the specified range // 5. success decoded frame if (interrupted_) { diff --git a/torchvision/csrc/io/decoder/stream.cpp b/torchvision/csrc/io/decoder/stream.cpp index 3fd49efbfc7..0d625ef211c 100644 --- a/torchvision/csrc/io/decoder/stream.cpp +++ b/torchvision/csrc/io/decoder/stream.cpp @@ -28,7 +28,7 @@ Stream::~Stream() { // look up the proper CODEC querying the function AVCodec* Stream::findCodec(AVCodecParameters* params) { - return avcodec_find_decoder(params->codec_id); + return (AVCodec*)avcodec_find_decoder(params->codec_id); } // Allocate memory for the AVCodecContext, which will hold the context for diff --git a/torchvision/csrc/io/decoder/subtitle_stream.cpp b/torchvision/csrc/io/decoder/subtitle_stream.cpp index 0d3fc9f12c1..27c61d4dbd9 100644 --- a/torchvision/csrc/io/decoder/subtitle_stream.cpp +++ b/torchvision/csrc/io/decoder/subtitle_stream.cpp @@ -43,21 +43,34 @@ int SubtitleStream::initFormat() { int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) { // clean-up releaseSubtitle(); + + // FIXME: should this even be created? + AVPacket* avPacket; + avPacket = av_packet_alloc(); + if (avPacket == nullptr) { + LOG(ERROR) + << "decoder as not able to allocate the subtitle-specific packet."; + // alternative to ENOMEM + return AVERROR_BUFFER_TOO_SMALL; + } + avPacket->data = nullptr; + avPacket->size = 0; // check flush packet - AVPacket avPacket; - av_init_packet(&avPacket); - avPacket.data = nullptr; - avPacket.size = 0; - auto pkt = packet ? *packet : avPacket; + auto pkt = packet ? packet : avPacket; + int gotFramePtr = 0; - int result = avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, &pkt); + // is these a better way than cast from const? + int result = + avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, (AVPacket*)pkt); if (result < 0) { LOG(ERROR) << "avcodec_decode_subtitle2 failed, err: " << Util::generateErrorDesc(result); + // free the packet we've created + av_packet_free(&avPacket); return result; } else if (result == 0) { - result = pkt.size; // discard the rest of the package + result = pkt->size; // discard the rest of the package } sub_.release = gotFramePtr; @@ -66,9 +79,10 @@ int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) { // set proper pts in us if (gotFramePtr) { sub_.pts = av_rescale_q( - pkt.pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ); + pkt->pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ); } + av_packet_free(&avPacket); return result; }