Re: [FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* Re: [FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
       [not found]   ` <DM4PR11MB52946A90C16BC74C6A6DF492F88A9@DM4PR11MB5294.namprd11.prod.outlook.com>
@ 2021-12-24 23:22     ` Ed Martin
  2021-12-25  5:49       ` Dennis Mungai
  0 siblings, 1 reply; 6+ messages in thread
From: Ed Martin @ 2021-12-24 23:22 UTC (permalink / raw)
  To: ffmpeg-devel

On 10/31/21 22:14, Chen, Wenbin wrote:
>> Add async_depth to increase encoder's performance. Reuse encode_fifo as
>> async buffer. Encoder puts all reordered frame to HW and then check
>> fifo size. If fifo < async_depth and the top frame is not ready, it will
>> return AVERROR(EAGAIN) to require more frames.
>>
>> 1080p transcoding (no B frames) with -async_depth=4 can increase 20%
>> performance on my environment.
>> The async increases performance but also introduces frame delay.
>>
>> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
>> ---
>>   libavcodec/vaapi_encode.c | 20 +++++++++++++++-----
>>   libavcodec/vaapi_encode.h | 12 ++++++++++--
>>   2 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>> index db0ae136a1..616fb7c089 100644
>> --- a/libavcodec/vaapi_encode.c
>> +++ b/libavcodec/vaapi_encode.c
>> @@ -1158,7 +1158,8 @@ static int
>> vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
>>           if (ctx->input_order == ctx->decode_delay)
>>               ctx->dts_pts_diff = pic->pts - ctx->first_pts;
>>           if (ctx->output_delay > 0)
>> -            ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
>> +            ctx->ts_ring[ctx->input_order %
>> +                        (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
>>
>>           pic->display_order = ctx->input_order;
>>           ++ctx->input_order;
>> @@ -1212,7 +1213,8 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>>               return AVERROR(EAGAIN);
>>       }
>>
>> -    while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES *
>> sizeof(VAAPIEncodePicture *)) {
>> +    while (av_fifo_size(ctx->encode_fifo) <
>> +            MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) {
>>           pic = NULL;
>>           err = vaapi_encode_pick_next(avctx, &pic);
>>           if (err < 0)
>> @@ -1234,6 +1236,14 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>>       if (!av_fifo_size(ctx->encode_fifo))
>>           return err;
>>
>> +    if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth *
>> sizeof(VAAPIEncodePicture *) &&
>> +        !ctx->end_of_stream) {
>> +        av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>> +        err = vaapi_encode_wait(avctx, pic, 0);
>> +        if (err < 0)
>> +            return err;
>> +    }
>> +
>>       av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>>       ctx->encode_order = pic->encode_order + 1;
>>
>> @@ -1252,7 +1262,7 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>>               pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
>>       } else {
>>           pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
>> -                                (3 * ctx->output_delay)];
>> +                                (3 * ctx->output_delay + ctx->async_depth)];
>>       }
>>       av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64"
>> dts %"PRId64".\n",
>>              pkt->pts, pkt->dts);
>> @@ -2566,8 +2576,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext
>> *avctx)
>>           }
>>       }
>>
>> -    ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
>> -                                      sizeof(VAAPIEncodePicture *));
>> +    ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH *
>> +                                     sizeof(VAAPIEncodePicture *));
>>       if (!ctx->encode_fifo)
>>           return AVERROR(ENOMEM);
>>
>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>> index 89fe8de466..1bf5d7c337 100644
>> --- a/libavcodec/vaapi_encode.h
>> +++ b/libavcodec/vaapi_encode.h
>> @@ -48,6 +48,7 @@ enum {
>>       MAX_TILE_ROWS          = 22,
>>       // A.4.1: table A.6 allows at most 20 tile columns for any level.
>>       MAX_TILE_COLS          = 20,
>> +    MAX_ASYNC_DEPTH        = 64,
>>   };
>>
>>   extern const AVCodecHWConfigInternal *const
>> ff_vaapi_encode_hw_configs[];
>> @@ -298,7 +299,8 @@ typedef struct VAAPIEncodeContext {
>>       // Timestamp handling.
>>       int64_t         first_pts;
>>       int64_t         dts_pts_diff;
>> -    int64_t         ts_ring[MAX_REORDER_DELAY * 3];
>> +    int64_t         ts_ring[MAX_REORDER_DELAY * 3 +
>> +                            MAX_ASYNC_DEPTH];
>>
>>       // Slice structure.
>>       int slice_block_rows;
>> @@ -348,6 +350,8 @@ typedef struct VAAPIEncodeContext {
>>       AVFrame         *frame;
>>
>>       AVFifoBuffer *encode_fifo;
>> +
>> +    int async_depth;
>>   } VAAPIEncodeContext;
>>
>>   enum {
>> @@ -458,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
>>       { "b_depth", \
>>         "Maximum B-frame reference depth", \
>>         OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
>> -      { .i64 = 1 }, 1, INT_MAX, FLAGS }
>> +      { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
>> +    { "async_depth", "Maximum processing parallelism. " \
>> +      "Increase this to improve single channel performance", \
>> +      OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
>> +      { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
>>
>>   #define VAAPI_ENCODE_RC_MODE(name, desc) \
>>       { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
>> --
>> 2.25.1
> ping

I tested this patchset and  I can confirm that it solves my bug that I 
thought was a mesa bug 
(https://gitlab.freedesktop.org/mesa/mesa/-/issues/1235)


I would love if this feature is incorporated into ffmpeg

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
  2021-12-24 23:22     ` [FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance Ed Martin
@ 2021-12-25  5:49       ` Dennis Mungai
  0 siblings, 0 replies; 6+ messages in thread
From: Dennis Mungai @ 2021-12-25  5:49 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Sat, 25 Dec 2021, 02:23 Ed Martin, <lists@edman007.com> wrote:

> On 10/31/21 22:14, Chen, Wenbin wrote:
> >> Add async_depth to increase encoder's performance. Reuse encode_fifo as
> >> async buffer. Encoder puts all reordered frame to HW and then check
> >> fifo size. If fifo < async_depth and the top frame is not ready, it will
> >> return AVERROR(EAGAIN) to require more frames.
> >>
> >> 1080p transcoding (no B frames) with -async_depth=4 can increase 20%
> >> performance on my environment.
> >> The async increases performance but also introduces frame delay.
> >>
> >> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> >> ---
> >>   libavcodec/vaapi_encode.c | 20 +++++++++++++++-----
> >>   libavcodec/vaapi_encode.h | 12 ++++++++++--
> >>   2 files changed, 25 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> >> index db0ae136a1..616fb7c089 100644
> >> --- a/libavcodec/vaapi_encode.c
> >> +++ b/libavcodec/vaapi_encode.c
> >> @@ -1158,7 +1158,8 @@ static int
> >> vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
> >>           if (ctx->input_order == ctx->decode_delay)
> >>               ctx->dts_pts_diff = pic->pts - ctx->first_pts;
> >>           if (ctx->output_delay > 0)
> >> -            ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] =
> pic->pts;
> >> +            ctx->ts_ring[ctx->input_order %
> >> +                        (3 * ctx->output_delay + ctx->async_depth)] =
> pic->pts;
> >>
> >>           pic->display_order = ctx->input_order;
> >>           ++ctx->input_order;
> >> @@ -1212,7 +1213,8 @@ int
> >> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >>               return AVERROR(EAGAIN);
> >>       }
> >>
> >> -    while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES *
> >> sizeof(VAAPIEncodePicture *)) {
> >> +    while (av_fifo_size(ctx->encode_fifo) <
> >> +            MAX_ASYNC_DEPTH * sizeof(VAAPIEncodePicture *)) {
> >>           pic = NULL;
> >>           err = vaapi_encode_pick_next(avctx, &pic);
> >>           if (err < 0)
> >> @@ -1234,6 +1236,14 @@ int
> >> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >>       if (!av_fifo_size(ctx->encode_fifo))
> >>           return err;
> >>
> >> +    if (av_fifo_size(ctx->encode_fifo) < ctx->async_depth *
> >> sizeof(VAAPIEncodePicture *) &&
> >> +        !ctx->end_of_stream) {
> >> +        av_fifo_generic_peek(ctx->encode_fifo, &pic, sizeof(pic),
> NULL);
> >> +        err = vaapi_encode_wait(avctx, pic, 0);
> >> +        if (err < 0)
> >> +            return err;
> >> +    }
> >> +
> >>       av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> >>       ctx->encode_order = pic->encode_order + 1;
> >>
> >> @@ -1252,7 +1262,7 @@ int
> >> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >>               pkt->dts = ctx->ts_ring[pic->encode_order] -
> ctx->dts_pts_diff;
> >>       } else {
> >>           pkt->dts = ctx->ts_ring[(pic->encode_order -
> ctx->decode_delay) %
> >> -                                (3 * ctx->output_delay)];
> >> +                                (3 * ctx->output_delay +
> ctx->async_depth)];
> >>       }
> >>       av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64"
> >> dts %"PRId64".\n",
> >>              pkt->pts, pkt->dts);
> >> @@ -2566,8 +2576,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext
> >> *avctx)
> >>           }
> >>       }
> >>
> >> -    ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> >> -                                      sizeof(VAAPIEncodePicture *));
> >> +    ctx->encode_fifo = av_fifo_alloc(MAX_ASYNC_DEPTH *
> >> +                                     sizeof(VAAPIEncodePicture *));
> >>       if (!ctx->encode_fifo)
> >>           return AVERROR(ENOMEM);
> >>
> >> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> >> index 89fe8de466..1bf5d7c337 100644
> >> --- a/libavcodec/vaapi_encode.h
> >> +++ b/libavcodec/vaapi_encode.h
> >> @@ -48,6 +48,7 @@ enum {
> >>       MAX_TILE_ROWS          = 22,
> >>       // A.4.1: table A.6 allows at most 20 tile columns for any level.
> >>       MAX_TILE_COLS          = 20,
> >> +    MAX_ASYNC_DEPTH        = 64,
> >>   };
> >>
> >>   extern const AVCodecHWConfigInternal *const
> >> ff_vaapi_encode_hw_configs[];
> >> @@ -298,7 +299,8 @@ typedef struct VAAPIEncodeContext {
> >>       // Timestamp handling.
> >>       int64_t         first_pts;
> >>       int64_t         dts_pts_diff;
> >> -    int64_t         ts_ring[MAX_REORDER_DELAY * 3];
> >> +    int64_t         ts_ring[MAX_REORDER_DELAY * 3 +
> >> +                            MAX_ASYNC_DEPTH];
> >>
> >>       // Slice structure.
> >>       int slice_block_rows;
> >> @@ -348,6 +350,8 @@ typedef struct VAAPIEncodeContext {
> >>       AVFrame         *frame;
> >>
> >>       AVFifoBuffer *encode_fifo;
> >> +
> >> +    int async_depth;
> >>   } VAAPIEncodeContext;
> >>
> >>   enum {
> >> @@ -458,7 +462,11 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
> >>       { "b_depth", \
> >>         "Maximum B-frame reference depth", \
> >>         OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
> >> -      { .i64 = 1 }, 1, INT_MAX, FLAGS }
> >> +      { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
> >> +    { "async_depth", "Maximum processing parallelism. " \
> >> +      "Increase this to improve single channel performance", \
> >> +      OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
> >> +      { .i64 = 4 }, 0, MAX_ASYNC_DEPTH, FLAGS }
> >>
> >>   #define VAAPI_ENCODE_RC_MODE(name, desc) \
> >>       { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name },
> \
> >> --
> >> 2.25.1
> > ping
>
> I tested this patchset and  I can confirm that it solves my bug that I
> thought was a mesa bug
> (https://gitlab.freedesktop.org/mesa/mesa/-/issues/1235)
>
>
> I would love if this feature is incorporated into ffmpeg


>
>
> Indeed, this is the only patch that makes AMD GPUs usable with VAAPI.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async to increase performance
       [not found] <20211027085705.4114165-1-wenbin.chen@intel.com>
       [not found] ` <20211027085705.4114165-3-wenbin.chen@intel.com>
@ 2021-12-27  7:22 ` Xiang, Haihao
  2021-12-27  7:41   ` Chen, Wenbin
  2021-12-27 18:31 ` Mark Thompson
  2 siblings, 1 reply; 6+ messages in thread
From: Xiang, Haihao @ 2021-12-27  7:22 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Chen, Wenbin

On Wed, 2021-10-27 at 16:57 +0800, Wenbin Chen wrote:
> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> decrease. The reason is that vaRenderPicture() and vaSyncSurface() are
> called at the same time (vaRenderPicture() always followed by a
> vaSyncSurface()). When we encode stream with B frames, we need buffer to
> reorder frames, so we can send serveral frames to HW at once to increase
> performance. Now I changed them to be called in a
> asynchronous way, which will make better use of hardware.
> 1080p transcoding increases about 17% fps on my environment.

Could you provide your command ? I'd like to have a try. 

Thanks
Haihao

> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>  libavcodec/vaapi_encode.c | 41 ++++++++++++++++++++++++++++-----------
>  libavcodec/vaapi_encode.h |  3 +++
>  2 files changed, 33 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index ec054ae701..5927849233 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -951,8 +951,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>      if (!pic && ctx->end_of_stream) {
>          --b_counter;
>          pic = ctx->pic_end;
> -        if (pic->encode_issued)
> +        if (pic->encode_complete)
>              return AVERROR_EOF;
> +        else if (pic->encode_issued)
> +            return AVERROR(EAGAIN);
>      }
>  
>      if (!pic) {
> @@ -1177,20 +1179,31 @@ int ff_vaapi_encode_receive_packet(AVCodecContext
> *avctx, AVPacket *pkt)
>              return AVERROR(EAGAIN);
>      }
>  
> -    pic = NULL;
> -    err = vaapi_encode_pick_next(avctx, &pic);
> -    if (err < 0)
> -        return err;
> -    av_assert0(pic);
> +    while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES *
> sizeof(VAAPIEncodePicture *)) {
> +        pic = NULL;
> +        err = vaapi_encode_pick_next(avctx, &pic);
> +        if (err < 0)
> +            break;
> +        av_assert0(pic);
>  
> -    pic->encode_order = ctx->encode_order++;
> +        pic->encode_order = ctx->encode_order +
> +                            (av_fifo_size(ctx->encode_fifo) /
> sizeof(VAAPIEncodePicture *));
>  
> -    err = vaapi_encode_issue(avctx, pic);
> -    if (err < 0) {
> -        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> -        return err;
> +        err = vaapi_encode_issue(avctx, pic);
> +        if (err < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +            return err;
> +        }
> +
> +        av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>      }
>  
> +    if (!av_fifo_size(ctx->encode_fifo))
> +        return err;
> +
> +    av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> +    ctx->encode_order = pic->encode_order + 1;
> +
>      err = vaapi_encode_output(avctx, pic, pkt);
>      if (err < 0) {
>          av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
> @@ -2520,6 +2533,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>          }
>      }
>  
> +    ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> +                                      sizeof(VAAPIEncodePicture *));
> +    if (!ctx->encode_fifo)
> +        return AVERROR(ENOMEM);
> +
>      return 0;
>  
>  fail:
> @@ -2552,6 +2570,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>  
>      av_freep(&ctx->codec_sequence_params);
>      av_freep(&ctx->codec_picture_params);
> +    av_fifo_freep(&ctx->encode_fifo);
>  
>      av_buffer_unref(&ctx->recon_frames_ref);
>      av_buffer_unref(&ctx->input_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index b41604a883..89fe8de466 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -29,6 +29,7 @@
>  
>  #include "libavutil/hwcontext.h"
>  #include "libavutil/hwcontext_vaapi.h"
> +#include "libavutil/fifo.h"
>  
>  #include "avcodec.h"
>  #include "hwconfig.h"
> @@ -345,6 +346,8 @@ typedef struct VAAPIEncodeContext {
>      int             roi_warned;
>  
>      AVFrame         *frame;
> +
> +    AVFifoBuffer *encode_fifo;
>  } VAAPIEncodeContext;
>  
>  enum {
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async to increase performance
  2021-12-27  7:22 ` [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async " Xiang, Haihao
@ 2021-12-27  7:41   ` Chen, Wenbin
  0 siblings, 0 replies; 6+ messages in thread
From: Chen, Wenbin @ 2021-12-27  7:41 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Xiang, Haihao

> On Wed, 2021-10-27 at 16:57 +0800, Wenbin Chen wrote:
> > Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> > decrease. The reason is that vaRenderPicture() and vaSyncSurface() are
> > called at the same time (vaRenderPicture() always followed by a
> > vaSyncSurface()). When we encode stream with B frames, we need buffer
> to
> > reorder frames, so we can send serveral frames to HW at once to increase
> > performance. Now I changed them to be called in a
> > asynchronous way, which will make better use of hardware.
> > 1080p transcoding increases about 17% fps on my environment.
> 
> Could you provide your command ? I'd like to have a try.
> 
> Thanks
> Haihao

Here is my command:
ffmpeg -hwaccel vaapi -hwaccel_output_format vaapi -i input.264 -c:v h264_vaapi output.264

Thanks
Wenbin

> 
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> >  libavcodec/vaapi_encode.c | 41 ++++++++++++++++++++++++++++-----------
> >  libavcodec/vaapi_encode.h |  3 +++
> >  2 files changed, 33 insertions(+), 11 deletions(-)
> >
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index ec054ae701..5927849233 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -951,8 +951,10 @@ static int
> vaapi_encode_pick_next(AVCodecContext *avctx,
> >      if (!pic && ctx->end_of_stream) {
> >          --b_counter;
> >          pic = ctx->pic_end;
> > -        if (pic->encode_issued)
> > +        if (pic->encode_complete)
> >              return AVERROR_EOF;
> > +        else if (pic->encode_issued)
> > +            return AVERROR(EAGAIN);
> >      }
> >
> >      if (!pic) {
> > @@ -1177,20 +1179,31 @@ int
> ff_vaapi_encode_receive_packet(AVCodecContext
> > *avctx, AVPacket *pkt)
> >              return AVERROR(EAGAIN);
> >      }
> >
> > -    pic = NULL;
> > -    err = vaapi_encode_pick_next(avctx, &pic);
> > -    if (err < 0)
> > -        return err;
> > -    av_assert0(pic);
> > +    while (av_fifo_size(ctx->encode_fifo) <= MAX_PICTURE_REFERENCES *
> > sizeof(VAAPIEncodePicture *)) {
> > +        pic = NULL;
> > +        err = vaapi_encode_pick_next(avctx, &pic);
> > +        if (err < 0)
> > +            break;
> > +        av_assert0(pic);
> >
> > -    pic->encode_order = ctx->encode_order++;
> > +        pic->encode_order = ctx->encode_order +
> > +                            (av_fifo_size(ctx->encode_fifo) /
> > sizeof(VAAPIEncodePicture *));
> >
> > -    err = vaapi_encode_issue(avctx, pic);
> > -    if (err < 0) {
> > -        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > -        return err;
> > +        err = vaapi_encode_issue(avctx, pic);
> > +        if (err < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +            return err;
> > +        }
> > +
> > +        av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> >      }
> >
> > +    if (!av_fifo_size(ctx->encode_fifo))
> > +        return err;
> > +
> > +    av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> > +    ctx->encode_order = pic->encode_order + 1;
> > +
> >      err = vaapi_encode_output(avctx, pic, pkt);
> >      if (err < 0) {
> >          av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
> > @@ -2520,6 +2533,11 @@ av_cold int
> ff_vaapi_encode_init(AVCodecContext *avctx)
> >          }
> >      }
> >
> > +    ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> > +                                      sizeof(VAAPIEncodePicture *));
> > +    if (!ctx->encode_fifo)
> > +        return AVERROR(ENOMEM);
> > +
> >      return 0;
> >
> >  fail:
> > @@ -2552,6 +2570,7 @@ av_cold int
> ff_vaapi_encode_close(AVCodecContext *avctx)
> >
> >      av_freep(&ctx->codec_sequence_params);
> >      av_freep(&ctx->codec_picture_params);
> > +    av_fifo_freep(&ctx->encode_fifo);
> >
> >      av_buffer_unref(&ctx->recon_frames_ref);
> >      av_buffer_unref(&ctx->input_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index b41604a883..89fe8de466 100644
> > --- a/libavcodec/vaapi_encode.h
> > +++ b/libavcodec/vaapi_encode.h
> > @@ -29,6 +29,7 @@
> >
> >  #include "libavutil/hwcontext.h"
> >  #include "libavutil/hwcontext_vaapi.h"
> > +#include "libavutil/fifo.h"
> >
> >  #include "avcodec.h"
> >  #include "hwconfig.h"
> > @@ -345,6 +346,8 @@ typedef struct VAAPIEncodeContext {
> >      int             roi_warned;
> >
> >      AVFrame         *frame;
> > +
> > +    AVFifoBuffer *encode_fifo;
> >  } VAAPIEncodeContext;
> >
> >  enum {
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async to increase performance
       [not found] <20211027085705.4114165-1-wenbin.chen@intel.com>
       [not found] ` <20211027085705.4114165-3-wenbin.chen@intel.com>
  2021-12-27  7:22 ` [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async " Xiang, Haihao
@ 2021-12-27 18:31 ` Mark Thompson
  2021-12-28  2:05   ` Chen, Wenbin
  2 siblings, 1 reply; 6+ messages in thread
From: Mark Thompson @ 2021-12-27 18:31 UTC (permalink / raw)
  To: ffmpeg-devel

On 27/10/2021 09:57, Wenbin Chen wrote:
> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> decrease. The reason is that vaRenderPicture() and vaSyncSurface() are
> called at the same time (vaRenderPicture() always followed by a
> vaSyncSurface()). When we encode stream with B frames, we need buffer to
> reorder frames, so we can send serveral frames to HW at once to increase
> performance. Now I changed them to be called in a
> asynchronous way, which will make better use of hardware.
> 1080p transcoding increases about 17% fps on my environment.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>   libavcodec/vaapi_encode.c | 41 ++++++++++++++++++++++++++++-----------
>   libavcodec/vaapi_encode.h |  3 +++
>   2 files changed, 33 insertions(+), 11 deletions(-)

The API does not allow this behaviour.

For some bizarre reason (I think a badly-written example combined with the Intel driver being synchronous in vaEndPicture() for a long time), the sync to a surface is to the /input/ surface of an encode rather than the output surface.

That means you can't have multiple encodes outstanding on the same surface and expect to sync usefully, because the only argument to vaSyncSurface() is the surface to sync to without anything about the associated context.

Therefore trying to make it asynchronous like this falls down when input surfaces might appear multiple times, or might be used in the input of multiple encoders, because you can't tell whether your sync means the thing you actually wanted to finish has finished.

(The commit you point to above as having decreased performance fixed this bug, since it became much more visible with decoupled send/receive.)

So: put this change after the switch to syncing on output buffers (since that operation does make sense for this), and leave the existing behaviour for cases where you have to sync on the input surface.

- Mark
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async to increase performance
  2021-12-27 18:31 ` Mark Thompson
@ 2021-12-28  2:05   ` Chen, Wenbin
  0 siblings, 0 replies; 6+ messages in thread
From: Chen, Wenbin @ 2021-12-28  2:05 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

> On 27/10/2021 09:57, Wenbin Chen wrote:
> > Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> > decrease. The reason is that vaRenderPicture() and vaSyncSurface() are
> > called at the same time (vaRenderPicture() always followed by a
> > vaSyncSurface()). When we encode stream with B frames, we need buffer
> to
> > reorder frames, so we can send serveral frames to HW at once to increase
> > performance. Now I changed them to be called in a
> > asynchronous way, which will make better use of hardware.
> > 1080p transcoding increases about 17% fps on my environment.
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> >   libavcodec/vaapi_encode.c | 41 ++++++++++++++++++++++++++++-----------
> >   libavcodec/vaapi_encode.h |  3 +++
> >   2 files changed, 33 insertions(+), 11 deletions(-)
> 
> The API does not allow this behaviour.
> 
> For some bizarre reason (I think a badly-written example combined with the
> Intel driver being synchronous in vaEndPicture() for a long time), the sync to
> a surface is to the /input/ surface of an encode rather than the output
> surface.
> 
> That means you can't have multiple encodes outstanding on the same
> surface and expect to sync usefully, because the only argument to
> vaSyncSurface() is the surface to sync to without anything about the
> associated context.
> 
> Therefore trying to make it asynchronous like this falls down when input
> surfaces might appear multiple times, or might be used in the input of
> multiple encoders, because you can't tell whether your sync means the thing
> you actually wanted to finish has finished.
> 
> (The commit you point to above as having decreased performance fixed this
> bug, since it became much more visible with decoupled send/receive.)
> 
> So: put this change after the switch to syncing on output buffers (since that
> operation does make sense for this), and leave the existing behaviour for
> cases where you have to sync on the input surface.
> 
> - Mark

Thanks for your advice. It makes sense to me. I will update the patches

Best Regards
Wenbin
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-12-28  2:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20211027085705.4114165-1-wenbin.chen@intel.com>
     [not found] ` <20211027085705.4114165-3-wenbin.chen@intel.com>
     [not found]   ` <DM4PR11MB52946A90C16BC74C6A6DF492F88A9@DM4PR11MB5294.namprd11.prod.outlook.com>
2021-12-24 23:22     ` [FFmpeg-devel] [PATCH 3/3] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance Ed Martin
2021-12-25  5:49       ` Dennis Mungai
2021-12-27  7:22 ` [FFmpeg-devel] [PATCH 1/3] libavcodec/vaapi_encode: Change the way to call async " Xiang, Haihao
2021-12-27  7:41   ` Chen, Wenbin
2021-12-27 18:31 ` Mark Thompson
2021-12-28  2:05   ` Chen, Wenbin

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git