* [FFmpeg-devel] [PATCH v5 2/2] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
2022-02-18 3:07 [FFmpeg-devel] [PATCH v5 1/2] libavcodec/vaapi_encode: Add new API adaption to vaapi_encode Wenbin Chen
@ 2022-02-18 3:07 ` Wenbin Chen
2022-02-21 6:52 ` [FFmpeg-devel] [PATCH v5 1/2] libavcodec/vaapi_encode: Add new API adaption to vaapi_encode Xiang, Haihao
1 sibling, 0 replies; 3+ messages in thread
From: Wenbin Chen @ 2022-02-18 3:07 UTC (permalink / raw)
To: ffmpeg-devel
Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
called at the same time (vaRenderPicture() always followed by a
vaSyncBuffer()). Now I changed them to be called in a asynchronous way,
which will make better use of hardware.
Async_depth is added to increase encoder's performance. The frames that
are sent to hardware are stored in a fifo. Encoder will sync output
after async fifo is full.
Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
---
doc/encoders.texi | 6 ++++
libavcodec/vaapi_encode.c | 64 +++++++++++++++++++++++++++++++--------
libavcodec/vaapi_encode.h | 16 ++++++++--
3 files changed, 71 insertions(+), 15 deletions(-)
diff --git a/doc/encoders.texi b/doc/encoders.texi
index bfb6c7eef6..6bac2b7f28 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3600,6 +3600,12 @@ will refer only to P- or I-frames. When set to greater values multiple layers
of B-frames will be present, frames in each layer only referring to frames in
higher layers.
+@item async_depth
+Maximum processing parallelism. Increase this to improve single channel
+performance. This option doesn't work if driver doesn't implement vaSyncBuffer
+function. Please make sure there are enough hw_frames allocated if a large
+number of async_depth is used.
+
@item rc_mode
Set the rate control mode to use. A given driver may only support a subset of
modes.
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 3f8c8ace2a..8c6e881702 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -965,8 +965,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
if (!pic && ctx->end_of_stream) {
--b_counter;
pic = ctx->pic_end;
- if (pic->encode_issued)
+ if (pic->encode_complete)
return AVERROR_EOF;
+ else if (pic->encode_issued)
+ return AVERROR(EAGAIN);
}
if (!pic) {
@@ -1137,7 +1139,8 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
if (ctx->input_order == ctx->decode_delay)
ctx->dts_pts_diff = pic->pts - ctx->first_pts;
if (ctx->output_delay > 0)
- ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
+ ctx->ts_ring[ctx->input_order %
+ (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
pic->display_order = ctx->input_order;
++ctx->input_order;
@@ -1191,18 +1194,47 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
return AVERROR(EAGAIN);
}
- pic = NULL;
- err = vaapi_encode_pick_next(avctx, &pic);
- if (err < 0)
- return err;
- av_assert0(pic);
+ if (ctx->has_sync_buffer_func) {
+ pic = NULL;
+
+ if (av_fifo_can_write(ctx->encode_fifo)) {
+ err = vaapi_encode_pick_next(avctx, &pic);
+ if (!err) {
+ av_assert0(pic);
+ pic->encode_order = ctx->encode_order +
+ av_fifo_can_read(ctx->encode_fifo);
+ err = vaapi_encode_issue(avctx, pic);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+ return err;
+ }
+ av_fifo_write(ctx->encode_fifo, &pic, 1);
+ }
+ }
- pic->encode_order = ctx->encode_order++;
+ if (!av_fifo_can_read(ctx->encode_fifo))
+ return err;
- err = vaapi_encode_issue(avctx, pic);
- if (err < 0) {
- av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
- return err;
+ // More frames can be buffered
+ if (av_fifo_can_write(ctx->encode_fifo) && !ctx->end_of_stream)
+ return AVERROR(EAGAIN);
+
+ av_fifo_read(ctx->encode_fifo, &pic, 1);
+ ctx->encode_order = pic->encode_order + 1;
+ } else {
+ pic = NULL;
+ err = vaapi_encode_pick_next(avctx, &pic);
+ if (err < 0)
+ return err;
+ av_assert0(pic);
+
+ pic->encode_order = ctx->encode_order++;
+
+ err = vaapi_encode_issue(avctx, pic);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+ return err;
+ }
}
err = vaapi_encode_output(avctx, pic, pkt);
@@ -1220,7 +1252,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
} else {
pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
- (3 * ctx->output_delay)];
+ (3 * ctx->output_delay + ctx->async_depth)];
}
av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
pkt->pts, pkt->dts);
@@ -2541,6 +2573,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0);
if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
ctx->has_sync_buffer_func = 1;
+ ctx->encode_fifo = av_fifo_alloc2(ctx->async_depth,
+ sizeof(VAAPIEncodePicture *),
+ 0);
+ if (!ctx->encode_fifo)
+ return AVERROR(ENOMEM);
}
#endif
@@ -2581,6 +2618,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
av_freep(&ctx->codec_sequence_params);
av_freep(&ctx->codec_picture_params);
+ av_fifo_freep2(&ctx->encode_fifo);
av_buffer_unref(&ctx->recon_frames_ref);
av_buffer_unref(&ctx->input_frames_ref);
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 29d9e9b91c..1b40819c69 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -29,6 +29,7 @@
#include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_vaapi.h"
+#include "libavutil/fifo.h"
#include "avcodec.h"
#include "hwconfig.h"
@@ -47,6 +48,7 @@ enum {
MAX_TILE_ROWS = 22,
// A.4.1: table A.6 allows at most 20 tile columns for any level.
MAX_TILE_COLS = 20,
+ MAX_ASYNC_DEPTH = 64,
};
extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
@@ -297,7 +299,8 @@ typedef struct VAAPIEncodeContext {
// Timestamp handling.
int64_t first_pts;
int64_t dts_pts_diff;
- int64_t ts_ring[MAX_REORDER_DELAY * 3];
+ int64_t ts_ring[MAX_REORDER_DELAY * 3 +
+ MAX_ASYNC_DEPTH];
// Slice structure.
int slice_block_rows;
@@ -348,6 +351,10 @@ typedef struct VAAPIEncodeContext {
// Whether the driver support vaSyncBuffer
int has_sync_buffer_func;
+ // Store buffered pic
+ AVFifo *encode_fifo;
+ // Max number of frame buffered in encoder.
+ int async_depth;
} VAAPIEncodeContext;
enum {
@@ -458,7 +465,12 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
{ "b_depth", \
"Maximum B-frame reference depth", \
OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
- { .i64 = 1 }, 1, INT_MAX, FLAGS }
+ { .i64 = 1 }, 1, INT_MAX, FLAGS }, \
+ { "async_depth", "Maximum processing parallelism. " \
+ "Increase this to improve single channel performance. This option " \
+ "doesn't work if driver doesn't implement vaSyncBuffer function.", \
+ OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
+ { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }
#define VAAPI_ENCODE_RC_MODE(name, desc) \
{ #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
--
2.32.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH v5 1/2] libavcodec/vaapi_encode: Add new API adaption to vaapi_encode
2022-02-18 3:07 [FFmpeg-devel] [PATCH v5 1/2] libavcodec/vaapi_encode: Add new API adaption to vaapi_encode Wenbin Chen
2022-02-18 3:07 ` [FFmpeg-devel] [PATCH v5 2/2] libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance Wenbin Chen
@ 2022-02-21 6:52 ` Xiang, Haihao
1 sibling, 0 replies; 3+ messages in thread
From: Xiang, Haihao @ 2022-02-21 6:52 UTC (permalink / raw)
To: ffmpeg-devel
On Fri, 2022-02-18 at 11:07 +0800, Wenbin Chen wrote:
> Add vaSyncBuffer to VAAPI encoder. Old version API vaSyncSurface wait
> surface to complete. When surface is used for multiple operation, it
> waits all operations to finish. vaSyncBuffer only wait one channel to
> finish.
>
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
> ---
> libavcodec/vaapi_encode.c | 32 +++++++++++++++++++++++++++-----
> libavcodec/vaapi_encode.h | 3 +++
> 2 files changed, 30 insertions(+), 5 deletions(-)
>
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 3bf379b1a0..3f8c8ace2a 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -150,11 +150,25 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
> "(input surface %#x).\n", pic->display_order,
> pic->encode_order, pic->input_surface);
>
> - vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface);
> - if (vas != VA_STATUS_SUCCESS) {
> - av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture completion: "
> - "%d (%s).\n", vas, vaErrorStr(vas));
> - return AVERROR(EIO);
> +#if VA_CHECK_VERSION(1, 9, 0)
> + if (ctx->has_sync_buffer_func) {
> + vas = vaSyncBuffer(ctx->hwctx->display,
> + pic->output_buffer,
> + VA_TIMEOUT_INFINITE);
> + if (vas != VA_STATUS_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to sync to output buffer
> completion: "
> + "%d (%s).\n", vas, vaErrorStr(vas));
> + return AVERROR(EIO);
> + }
> + } else
> +#endif
> + { // If vaSyncBuffer is not implemented, try old version API.
> + vas = vaSyncSurface(ctx->hwctx->display, pic->input_surface);
> + if (vas != VA_STATUS_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to sync to picture
> completion: "
> + "%d (%s).\n", vas, vaErrorStr(vas));
> + return AVERROR(EIO);
> + }
> }
>
> // Input is definitely finished with now.
> @@ -2522,6 +2536,14 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
> }
> }
>
> +#if VA_CHECK_VERSION(1, 9, 0)
> + // check vaSyncBuffer function
> + vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0);
> + if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> + ctx->has_sync_buffer_func = 1;
> + }
> +#endif
> +
> return 0;
>
> fail:
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index b41604a883..29d9e9b91c 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -345,6 +345,9 @@ typedef struct VAAPIEncodeContext {
> int roi_warned;
>
> AVFrame *frame;
> +
> + // Whether the driver support vaSyncBuffer
> + int has_sync_buffer_func;
> } VAAPIEncodeContext;
>
> enum {
Pathset LGTM and I tested it with i965 driver on SkyLake and iHD driver on DG1.
Will apply.
-Haihao
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread