Re: [FFmpeg-devel] [PATCH] avcodec/cuviddec: update amount of decoder surfaces from within sequence decode callback

From: Roman Arzumanyan <r.arzumanyan@visionlabs.ai>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/cuviddec: update amount of decoder surfaces from within sequence decode callback
Date: Mon, 5 Jun 2023 15:29:23 +0300
Message-ID: <CAJZRZVkhtGWg26FUw_BUDO+y_30CVPasQhRhjJU=fr1LR8RwFA@mail.gmail.com> (raw)
In-Reply-To: <168596036850.3843.16759199081478040168@lain.khirnov.net>

[-- Attachment #1: Type: text/plain, Size: 3802 bytes --]

Thanks for the review, Anton. Please find the updated patch attached.
BTW - what do you mean by "you should also forward the actual error code"?
Within the cuvid_handle_video_sequence() function in case of error,
function still returns 0 and sets internal context error, so I kept
behavior intact. Do I miss something?

пн, 5 июн. 2023 г. в 13:19, Anton Khirnov <anton@khirnov.net>:

> Quoting Roman Arzumanyan (2023-06-05 09:30:07)
> > Hello,
> >
> > This patch reduces vRAM usage by cuvid decoder implementation.
> > The number of surfaces used for decoding is updated within the parser
> > sequence decode callback.
> > Also the "surfaces" AVDictionary option specific to cuvid was removed in
> > favor of "extra_hw_surfaces".
>
> This can break existing workflows, you should deprecated the option
> instead and only remove it after some time has passed.
>
> >
> > vRAM consumption was tested on various videos and savings are between 1%
> > for 360p resolution up to 21% for some 1080p H.264 videos.
> > Decoding performance was tested on various H.264 and H.265 videos in
> > different resolutions from 360p and higher, no performance penalty was
> > found.
> >
> > From 32a1b016e88fa40b983318d4583750ef250a78d9 Mon Sep 17 00:00:00 2001
> > From: Roman Arzumanyan <r.arzumanyan@visionlabs.ai>
> > Date: Thu, 1 Jun 2023 11:17:39 +0300
> > Subject: [PATCH] libavcodec/cuviddec: determine DPB size from within
> cuvid
> >  parser
> >
> > ---
> >  libavcodec/cuviddec.c | 29 +++++++++++++++++++++++++++--
> >  1 file changed, 27 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> > index 3d43bbd466..759ed49870 100644
> > --- a/libavcodec/cuviddec.c
> > +++ b/libavcodec/cuviddec.c
> > @@ -115,6 +115,12 @@ typedef struct CuvidParsedFrame
> >
> >  #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
> >
> > +// NV recommends [2;4] range
> > +#define CUVID_MAX_DISPLAY_DELAY (4)
> > +
> > +// Actual DPB size will be determined by parser.
> > +#define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1)
> > +
> >  static int CUDAAPI cuvid_handle_video_sequence(void *opaque,
> CUVIDEOFORMAT* format)
> >  {
> >      AVCodecContext *avctx = opaque;
> > @@ -309,6 +315,25 @@ static int CUDAAPI cuvid_handle_video_sequence(void
> *opaque, CUVIDEOFORMAT* form
> >          return 0;
> >      }
> >
> > +    if (ctx->nb_surfaces < format->min_num_decode_surfaces + 3)
> > +        ctx->nb_surfaces = format->min_num_decode_surfaces + 3;
>
> FFMAX()
>
> > +
> > +    if (avctx->extra_hw_frames > 0)
> > +        ctx->nb_surfaces += avctx->extra_hw_frames;
> > +
> > +    if (0 > av_fifo_realloc2(ctx->frame_queue, ctx->nb_surfaces *
> sizeof(CuvidParsedFrame))) {
>
> this is the old deprecated AVFifoBuffer API, you cannot use it with
> AVFifo objects
>
> you should also forward the actual error code
>
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on
> video sequence callback\n");
> > +        ctx->internal_error = AVERROR(EINVAL);
> > +        return 0;
> > +    }
> > +
> > +    ctx->key_frame = av_realloc_array(ctx->key_frame, ctx->nb_surfaces,
> sizeof(int));
> > +    if (!ctx->key_frame) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to recreate key frame queue
> on video sequence callback\n");
> > +        ctx->internal_error = AVERROR(EINVAL);
>
> Leaks key_frame on failure and should be ENOMEM.
>
> --
> Anton Khirnov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: 0001-libavcodec-cuviddec-determine-amount-of-decoded-surf.patch --]
[-- Type: text/x-patch, Size: 4469 bytes --]

From 4aeff36c9d79c046e726bcc18a311754ace5c47e Mon Sep 17 00:00:00 2001
From: Roman Arzumanyan <r.arzumanyan@visionlabs.ai>
Date: Thu, 1 Jun 2023 11:17:39 +0300
Subject: [PATCH] libavcodec/cuviddec: determine amount of decoded surfaces
 from within cuvid parser

---
 libavcodec/cuviddec.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 3d43bbd466..ef25c1d470 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -115,6 +115,12 @@ typedef struct CuvidParsedFrame
 
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 
+// NV recommends [2;4] range
+#define CUVID_MAX_DISPLAY_DELAY (4)
+
+// Actual pool size will be determined by parser.
+#define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1)
+
 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 {
     AVCodecContext *avctx = opaque;
@@ -124,6 +130,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     CUVIDDECODECREATEINFO cuinfo;
     int surface_fmt;
     int chroma_444;
+    int fifo_size_inc;
 
     int old_width = avctx->width;
     int old_height = avctx->height;
@@ -309,6 +316,25 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
         return 0;
     }
 
+    fifo_size_inc = ctx->nb_surfaces;
+    ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, format->min_num_decode_surfaces + 3);
+
+    if (0 < avctx->extra_hw_frames)
+        ctx->nb_surfaces += avctx->extra_hw_frames;
+
+    fifo_size_inc = ctx->nb_surfaces - fifo_size_inc;
+    if (0 < fifo_size_inc && 0 > av_fifo_grow2(ctx->frame_queue, fifo_size_inc)) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to grow frame queue on video sequence callback\n");
+        ctx->internal_error = AVERROR(ENOMEM);
+        return 0;
+    }
+
+    if (0 < fifo_size_inc && 0 > av_reallocp_array(&ctx->key_frame, ctx->nb_surfaces, sizeof(int))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to grow key frame array on video sequence callback\n");
+        ctx->internal_error = AVERROR(ENOMEM);
+        return 0;
+    }
+
     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
     cuinfo.ulNumOutputSurfaces = 1;
     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
@@ -846,6 +872,10 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         goto error;
     }
 
+    // respect the deprecated "surfaces" option if non-default value is given by user;
+    if(25 == ctx->nb_surfaces)
+        ctx->nb_surfaces = CUVID_DEFAULT_NUM_SURFACES;
+
     ctx->frame_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(CuvidParsedFrame), 0);
     if (!ctx->frame_queue) {
         ret = AVERROR(ENOMEM);
@@ -993,7 +1023,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     }
 
     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
-    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4;
+    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : CUVID_MAX_DISPLAY_DELAY;
     ctx->cuparseinfo.pUserData = avctx;
     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
@@ -1097,7 +1127,7 @@ static const AVOption options[] = {
     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
-    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
+    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD | AV_OPT_FLAG_DEPRECATED },
     { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { "crop",     "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
     { "resize",   "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
-- 
2.34.1


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".