From 4aeff36c9d79c046e726bcc18a311754ace5c47e Mon Sep 17 00:00:00 2001 From: Roman Arzumanyan Date: Thu, 1 Jun 2023 11:17:39 +0300 Subject: [PATCH] libavcodec/cuviddec: determine amount of decoded surfaces from within cuvid parser --- libavcodec/cuviddec.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 3d43bbd466..ef25c1d470 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -115,6 +115,12 @@ typedef struct CuvidParsedFrame #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) +// NV recommends [2;4] range +#define CUVID_MAX_DISPLAY_DELAY (4) + +// Actual pool size will be determined by parser. +#define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1) + static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { AVCodecContext *avctx = opaque; @@ -124,6 +130,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form CUVIDDECODECREATEINFO cuinfo; int surface_fmt; int chroma_444; + int fifo_size_inc; int old_width = avctx->width; int old_height = avctx->height; @@ -309,6 +316,25 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return 0; } + fifo_size_inc = ctx->nb_surfaces; + ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, format->min_num_decode_surfaces + 3); + + if (0 < avctx->extra_hw_frames) + ctx->nb_surfaces += avctx->extra_hw_frames; + + fifo_size_inc = ctx->nb_surfaces - fifo_size_inc; + if (0 < fifo_size_inc && 0 > av_fifo_grow2(ctx->frame_queue, fifo_size_inc)) { + av_log(avctx, AV_LOG_ERROR, "Failed to grow frame queue on video sequence callback\n"); + ctx->internal_error = AVERROR(ENOMEM); + return 0; + } + + if (0 < fifo_size_inc && 0 > av_reallocp_array(&ctx->key_frame, ctx->nb_surfaces, sizeof(int))) { + av_log(avctx, AV_LOG_ERROR, "Failed to grow key frame array on video sequence callback\n"); + ctx->internal_error = AVERROR(ENOMEM); + return 0; + } + cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; @@ -846,6 +872,10 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) goto error; } + // respect the deprecated "surfaces" option if non-default value is given by user; + if(25 == ctx->nb_surfaces) + ctx->nb_surfaces = CUVID_DEFAULT_NUM_SURFACES; + ctx->frame_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(CuvidParsedFrame), 0); if (!ctx->frame_queue) { ret = AVERROR(ENOMEM); @@ -993,7 +1023,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) } ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces; - ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4; + ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : CUVID_MAX_DISPLAY_DELAY; ctx->cuparseinfo.pUserData = avctx; ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence; ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode; @@ -1097,7 +1127,7 @@ static const AVOption options[] = { { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" }, { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" }, { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, - { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD }, + { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD | AV_OPT_FLAG_DEPRECATED }, { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD }, { "crop", "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, { "resize", "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, -- 2.34.1