diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index 45f6ba1..afe0b30 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -241,7 +241,7 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() { // What happens to those decode surfaces that haven't yet been mapped is // unclear. flush(); - unmapPreviousFrame(); + // unmapPreviousFrame(); NVDECCache::getCache(device_).returnDecoder( &videoFormat_, std::move(decoder_)); } @@ -560,7 +560,7 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) { // color-converted (with a copy), or that's a frame that was discarded in // SingleStreamDecoder. Either way, the underlying output surface can be // safely re-used. - unmapPreviousFrame(); + // unmapPreviousFrame(); CUresult result = cuvidMapVideoFrame( *decoder_.get(), dispInfo.picture_index, &framePtr, &pitch, &procParams); if (result != CUDA_SUCCESS) { @@ -569,6 +569,7 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) { previouslyMappedFrame_ = framePtr; avFrame = convertCudaFrameToAVFrame(framePtr, pitch, dispInfo); + unmapPreviousFrame(); return AVSUCCESS; } void BetaCudaDeviceInterface::unmapPreviousFrame() { - if (previouslyMappedFrame_ == 0) { - return; - } + // if (previouslyMappedFrame_ == 0) { + // return; + // } CUresult result = cuvidUnmapVideoFrame(*decoder_.get(), previouslyMappedFrame_); TORCH_CHECK( diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index c8870df..2a5915c 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -409,6 +409,17 @@ torch::Tensor SingleStreamDecoder::getKeyFrameIndices() { return keyFrameIndices; } +inline char* find_codec(const char* input) { + const char* codecs[] = {"h264", "hevc", "av1", "vp9"}; + size_t codec_len = sizeof(codecs) / sizeof(codecs[0]); + for (size_t i = 0; i < codec_len; ++i) { + if (strstr(input, codecs[i])) { + return (char*)codecs[i]; + } + } + return NULL; +} + // -------------------------------------------------------------------------- // ADDING STREAMS API // -------------------------------------------------------------------------- @@ -461,9 +472,22 @@ void SingleStreamDecoder::addStream( // TODO_CODE_QUALITY it's pretty meh to have a video-specific logic within // addStream() which is supposed to be generic if (mediaType == AVMEDIA_TYPE_VIDEO) { - avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream( - deviceInterface_->findCodec(streamInfo.stream->codecpar->codec_id) - .value_or(avCodec)); + if (device.type() != torch::kCUDA) { + avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream( + deviceInterface_->findCodec(streamInfo.stream->codecpar->codec_id) + .value_or(avCodec)); + } + else { + const char* cuvid_suffix = "_cuvid"; + char* codec_name = find_codec(avCodec->name); + size_t cuvid_length = std::strlen(codec_name) + std::strlen(cuvid_suffix) + 1; + char* cuvid_name = new char[cuvid_length]; + std::strcpy(cuvid_name, codec_name); + std::strcat(cuvid_name, cuvid_suffix); + avCodec = avcodec_find_decoder_by_name(cuvid_name); + delete[] cuvid_name; + TORCH_CHECK(avCodec != nullptr); + } } AVCodecContext* codecContext = avcodec_alloc_context3(avCodec);