* [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically
@ 2024-03-28 1:26 fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com
` (6 more replies)
0 siblings, 7 replies; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++-------
libavcodec/vaapi_decode.h | 7 ++-----
2 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index cca94b5336..1b1972a2a9 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -38,12 +38,23 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
{
VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
VAStatus vas;
- VABufferID buffer;
- av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
+ av_assert0(pic->nb_param_buffers <= pic->param_allocated);
+ if (pic->nb_param_buffers == pic->param_allocated) {
+ pic->param_buffers =
+ av_realloc_array(pic->param_buffers,
+ pic->param_allocated + 16,
+ sizeof(*pic->param_buffers));
+ if (!pic->param_buffers)
+ return AVERROR(ENOMEM);
+
+ pic->param_allocated += 16;
+ }
+ av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated);
vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
- type, size, 1, (void*)data, &buffer);
+ type, size, 1, (void*)data,
+ &pic->param_buffers[pic->nb_param_buffers]);
if (vas != VA_STATUS_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to create parameter "
"buffer (type %d): %d (%s).\n",
@@ -51,14 +62,14 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
return AVERROR(EIO);
}
- pic->param_buffers[pic->nb_param_buffers++] = buffer;
-
av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes) "
- "is %#x.\n", type, size, buffer);
+ "is %#x.\n", type, size, pic->param_buffers[pic->nb_param_buffers]);
+
+ ++pic->nb_param_buffers;
+
return 0;
}
-
int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx,
VAAPIDecodePicture *pic,
const void *params_data,
@@ -215,6 +226,8 @@ fail:
fail_at_end:
exit:
pic->nb_param_buffers = 0;
+ pic->param_allocated = 0;
+ av_freep(&pic->param_buffers);
pic->nb_slices = 0;
pic->slices_allocated = 0;
av_freep(&pic->slice_buffers);
@@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext *avctx,
ff_vaapi_decode_destroy_buffers(avctx, pic);
pic->nb_param_buffers = 0;
+ pic->param_allocated = 0;
+ av_freep(&pic->param_buffers);
pic->nb_slices = 0;
pic->slices_allocated = 0;
av_freep(&pic->slice_buffers);
diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h
index 6beda14e52..a41d7ff2ff 100644
--- a/libavcodec/vaapi_decode.h
+++ b/libavcodec/vaapi_decode.h
@@ -32,15 +32,12 @@ static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic)
return (uintptr_t)pic->data[3];
}
-enum {
- MAX_PARAM_BUFFERS = 16,
-};
-
typedef struct VAAPIDecodePicture {
VASurfaceID output_surface;
int nb_param_buffers;
- VABufferID param_buffers[MAX_PARAM_BUFFERS];
+ VABufferID *param_buffers;
+ int param_allocated;
int nb_slices;
VABufferID *slice_buffers;
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com
` (5 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
So that hardware decoder can use the flags too.
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/vvc/vvc_refs.c | 4 ----
libavcodec/vvc/vvc_refs.h | 5 +++++
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c
index afcfc09da7..bf70777550 100644
--- a/libavcodec/vvc/vvc_refs.c
+++ b/libavcodec/vvc/vvc_refs.c
@@ -28,10 +28,6 @@
#include "vvc_refs.h"
-#define VVC_FRAME_FLAG_OUTPUT (1 << 0)
-#define VVC_FRAME_FLAG_SHORT_REF (1 << 1)
-#define VVC_FRAME_FLAG_LONG_REF (1 << 2)
-#define VVC_FRAME_FLAG_BUMPING (1 << 3)
typedef struct FrameProgress {
atomic_int progress[VVC_PROGRESS_LAST];
diff --git a/libavcodec/vvc/vvc_refs.h b/libavcodec/vvc/vvc_refs.h
index eba4422fb4..509fc6af22 100644
--- a/libavcodec/vvc/vvc_refs.h
+++ b/libavcodec/vvc/vvc_refs.h
@@ -25,6 +25,11 @@
#include "vvcdec.h"
+#define VVC_FRAME_FLAG_OUTPUT (1 << 0)
+#define VVC_FRAME_FLAG_SHORT_REF (1 << 1)
+#define VVC_FRAME_FLAG_LONG_REF (1 << 2)
+#define VVC_FRAME_FLAG_BUMPING (1 << 3)
+
int ff_vvc_output_frame(VVCContext *s, VVCFrameContext *fc, struct AVFrame *out, int no_output_of_prior_pics_flag, int flush);
void ff_vvc_bump_frame(VVCContext *s, VVCFrameContext *fc);
int ff_vvc_set_new_ref(VVCContext *s, VVCFrameContext *fc, struct AVFrame **frame);
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/cbs_h266.h | 1 +
libavcodec/cbs_h266_syntax_template.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
index 73d94157d4..19f83aeb49 100644
--- a/libavcodec/cbs_h266.h
+++ b/libavcodec/cbs_h266.h
@@ -593,6 +593,7 @@ typedef struct H266RawPPS {
uint16_t sub_pic_id_val[VVC_MAX_SLICES];
uint16_t col_width_val[VVC_MAX_TILE_COLUMNS];
uint16_t row_height_val[VVC_MAX_TILE_ROWS];
+ uint16_t slice_top_left_tile_idx[VVC_MAX_SLICES];
} H266RawPPS;
typedef struct H266RawAPS {
diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c
index 0aae9fdfd5..12f821b3fd 100644
--- a/libavcodec/cbs_h266_syntax_template.c
+++ b/libavcodec/cbs_h266_syntax_template.c
@@ -1945,6 +1945,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
else
infer(pps_tile_idx_delta_present_flag, 0);
for (i = 0; i < current->pps_num_slices_in_pic_minus1; i++) {
+ current->slice_top_left_tile_idx[i] = tile_idx;
tile_x = tile_idx % current->num_tile_columns;
tile_y = tile_idx / current->num_tile_columns;
if (tile_x != current->num_tile_columns - 1) {
@@ -2027,6 +2028,8 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
}
num_slices_in_tile = j;
}
+ for (int k = 0; k < num_slices_in_tile; k++)
+ current->slice_top_left_tile_idx[i + k] = tile_idx;
i += num_slices_in_tile - 1;
} else {
uint16_t height = 0;
@@ -2070,6 +2073,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
if (i == current->pps_num_slices_in_pic_minus1) {
uint16_t height = 0;
+ current->slice_top_left_tile_idx[i] = tile_idx;
tile_x = tile_idx % current->num_tile_columns;
tile_y = tile_idx / current->num_tile_columns;
if (tile_y >= current->num_tile_rows)
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile to H266RawPPS
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com
` (3 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/cbs_h266.h | 1 +
libavcodec/cbs_h266_syntax_template.c | 16 ++++++++++------
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
index 19f83aeb49..87bd39c6f5 100644
--- a/libavcodec/cbs_h266.h
+++ b/libavcodec/cbs_h266.h
@@ -594,6 +594,7 @@ typedef struct H266RawPPS {
uint16_t col_width_val[VVC_MAX_TILE_COLUMNS];
uint16_t row_height_val[VVC_MAX_TILE_ROWS];
uint16_t slice_top_left_tile_idx[VVC_MAX_SLICES];
+ uint16_t num_slices_in_tile[VVC_MAX_SLICES];
} H266RawPPS;
typedef struct H266RawAPS {
diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c
index 12f821b3fd..3f378d199e 100644
--- a/libavcodec/cbs_h266_syntax_template.c
+++ b/libavcodec/cbs_h266_syntax_template.c
@@ -1976,14 +1976,13 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
if (current->pps_slice_width_in_tiles_minus1[i] == 0 &&
current->pps_slice_height_in_tiles_minus1[i] == 0 &&
current->row_height_val[tile_y] > 1) {
- int num_slices_in_tile,
- uniform_slice_height, remaining_height_in_ctbs_y;
+ int uniform_slice_height, remaining_height_in_ctbs_y;
remaining_height_in_ctbs_y =
current->row_height_val[tile_y];
ues(pps_num_exp_slices_in_tile[i],
0, current->row_height_val[tile_y] - 1, 1, i);
if (current->pps_num_exp_slices_in_tile[i] == 0) {
- num_slices_in_tile = 1;
+ current->num_slices_in_tile[i] = 1;
current->slice_height_in_ctus[i] = current->row_height_val[tile_y];
slice_top_left_ctu_x[i] = ctu_x;
slice_top_left_ctu_y[i] = ctu_y;
@@ -2026,14 +2025,18 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
slice_top_left_ctu_y[i + j] = ctu_y;
j++;
}
- num_slices_in_tile = j;
+ current->num_slices_in_tile[i] = j;
}
- for (int k = 0; k < num_slices_in_tile; k++)
+ for (int k = 0; k < current->num_slices_in_tile[i]; k++)
current->slice_top_left_tile_idx[i + k] = tile_idx;
- i += num_slices_in_tile - 1;
+ i += current->num_slices_in_tile[i] - 1;
} else {
uint16_t height = 0;
infer(pps_num_exp_slices_in_tile[i], 0);
+ if (current->pps_slice_width_in_tiles_minus1[i] == 0 &&
+ current->pps_slice_height_in_tiles_minus1[i] == 0)
+ current->num_slices_in_tile[i] = 1;
+
for (j = 0;
j <= current->pps_slice_height_in_tiles_minus1[i];
j++) {
@@ -2074,6 +2077,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw,
uint16_t height = 0;
current->slice_top_left_tile_idx[i] = tile_idx;
+ current->num_slices_in_tile[i] = 1;
tile_x = tile_idx % current->num_tile_columns;
tile_y = tile_idx / current->num_tile_columns;
if (tile_y >= current->num_tile_rows)
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
` (2 preceding siblings ...)
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com
` (2 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/vvc/vvc_ps.c | 10 +++++++++-
libavcodec/vvc/vvc_ps.h | 1 +
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/libavcodec/vvc/vvc_ps.c b/libavcodec/vvc/vvc_ps.c
index 7972803da6..97eef85be9 100644
--- a/libavcodec/vvc/vvc_ps.c
+++ b/libavcodec/vvc/vvc_ps.c
@@ -873,13 +873,21 @@ static void alf_derive(VVCALF *alf, const H266RawAPS *aps)
alf_cc(alf, aps);
}
+static void alf_free(FFRefStructOpaque unused, void *obj)
+{
+ VVCALF *alf = obj;
+
+ ff_refstruct_unref(&alf->r);
+}
+
static int aps_decode_alf(const VVCALF **alf, const H266RawAPS *aps)
{
- VVCALF *a = ff_refstruct_allocz(sizeof(*a));
+ VVCALF *a = ff_refstruct_alloc_ext(sizeof(*a), 0, NULL, alf_free);
if (!a)
return AVERROR(ENOMEM);
alf_derive(a, aps);
+ ff_refstruct_replace(&a->r, aps);
ff_refstruct_replace(alf, a);
ff_refstruct_unref(&a);
diff --git a/libavcodec/vvc/vvc_ps.h b/libavcodec/vvc/vvc_ps.h
index 1164d0eab6..d306e0354a 100644
--- a/libavcodec/vvc/vvc_ps.h
+++ b/libavcodec/vvc/vvc_ps.h
@@ -159,6 +159,7 @@ typedef struct VVCPH {
#define ALF_NUM_COEFF_CC 7
typedef struct VVCALF {
+ const H266RawAPS *r;
int16_t luma_coeff [ALF_NUM_FILTERS_LUMA][ALF_NUM_COEFF_LUMA];
uint8_t luma_clip_idx [ALF_NUM_FILTERS_LUMA][ALF_NUM_COEFF_LUMA];
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
` (3 preceding siblings ...)
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-03-28 2:04 ` Andreas Rheinhardt
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com
2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson
6 siblings, 1 reply; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
libavcodec/vvc/vvc_refs.c | 6 ++++
libavcodec/vvc/vvcdec.c | 67 +++++++++++++++++++++++++++++++++++----
libavcodec/vvc/vvcdec.h | 5 +++
3 files changed, 72 insertions(+), 6 deletions(-)
diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c
index bf70777550..c9f89a5a0a 100644
--- a/libavcodec/vvc/vvc_refs.c
+++ b/libavcodec/vvc/vvc_refs.c
@@ -25,6 +25,7 @@
#include "libavutil/thread.h"
#include "libavcodec/refstruct.h"
#include "libavcodec/thread.h"
+#include "libavcodec/decode.h"
#include "vvc_refs.h"
@@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags)
ff_refstruct_unref(&frame->rpl_tab);
frame->collocated_ref = NULL;
+ ff_refstruct_unref(&frame->hwaccel_picture_private);
}
}
@@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc)
if (!frame->progress)
goto fail;
+ ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame->hwaccel_picture_private);
+ if (ret < 0)
+ goto fail;
+
return frame;
fail:
ff_vvc_unref_frame(fc, frame, ~0);
diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
index d5704aca25..f2e269ce76 100644
--- a/libavcodec/vvc/vvcdec.c
+++ b/libavcodec/vvc/vvcdec.c
@@ -24,6 +24,8 @@
#include "libavcodec/decode.h"
#include "libavcodec/profiles.h"
#include "libavcodec/refstruct.h"
+#include "libavcodec/hwconfig.h"
+#include "libavcodec/hwaccel_internal.h"
#include "libavutil/cpu.h"
#include "libavutil/thread.h"
@@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src)
ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab);
ff_refstruct_replace(&dst->rpl, src->rpl);
+ ff_refstruct_replace(&dst->hwaccel_picture_private,
+ src->hwaccel_picture_private);
dst->nb_rpl_elems = src->nb_rpl_elems;
dst->poc = src->poc;
@@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc,
return 0;
}
+static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps)
+{
+#define HWACCEL_MAX 0
+
+ enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
+
+ switch (sps->pix_fmt) {
+ case AV_PIX_FMT_YUV420P:
+ break;
+ case AV_PIX_FMT_YUV420P10:
+ break;
+ }
+
+ *fmt++ = sps->pix_fmt;
+ *fmt = AV_PIX_FMT_NONE;
+
+ return ff_get_format(avctx, pix_fmts);
+}
+
static void export_frame_params(VVCContext *s, const VVCFrameContext *fc)
{
AVCodecContext *c = s->avctx;
const VVCSPS *sps = fc->ps.sps;
const VVCPPS *pps = fc->ps.pps;
- c->pix_fmt = sps->pix_fmt;
- c->coded_width = pps->width;
- c->coded_height = pps->height;
- c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]);
- c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]);
+ // Reset HW config if pix_fmt/w/h change.
+ if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width || c->coded_height != pps->height) {
+ c->coded_width = pps->width;
+ c->coded_height = pps->height;
+ c->pix_fmt = get_format(c, sps);
+ s->pix_fmt = sps->pix_fmt;
+ }
+
+ c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]);
+ c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]);
}
static int frame_setup(VVCFrameContext *fc, VVCContext *s)
@@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s, VVCFrameContext *fc, const H2645NAL *nal,
ret = slice_init_entry_points(sc, fc, nal, unit);
if (ret < 0)
return ret;
+
+ if (s->avctx->hwaccel) {
+ if (is_first_slice) {
+ ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = FF_HW_CALL(s->avctx, decode_slice,
+ nal->raw_data, nal->raw_size);
+ if (ret < 0)
+ return ret;
+ }
+
fc->nb_slices++;
return 0;
@@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output)
static int submit_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *output, int *got_output)
{
int ret;
+
+ if (s->avctx->hwaccel) {
+ if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "Hardware accelerator failed to decode picture\n");
+ ff_vvc_unref_frame(fc, fc->ref, ~0);
+ return ret;
+ }
+ } else
+ ff_vvc_frame_submit(s, fc);
+
s->nb_frames++;
s->nb_delayed++;
- ff_vvc_frame_submit(s, fc);
+
if (s->nb_delayed >= s->nb_fcs) {
if ((ret = wait_delayed_frame(s, output, got_output)) < 0)
return ret;
@@ -1027,6 +1080,8 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx)
GDR_SET_RECOVERED(s);
ff_thread_once(&init_static_once, init_default_scale_m);
+ s->pix_fmt = AV_PIX_FMT_NONE;
+
return 0;
}
diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h
index aa3d715524..009d57424e 100644
--- a/libavcodec/vvc/vvcdec.h
+++ b/libavcodec/vvc/vvcdec.h
@@ -78,6 +78,9 @@ typedef struct VVCFrame {
* A combination of VVC_FRAME_FLAG_*
*/
uint8_t flags;
+
+ AVBufferRef *hwaccel_priv_buf;
+ void *hwaccel_picture_private; ///< hardware accelerator private data
} VVCFrame;
typedef struct SliceContext {
@@ -224,6 +227,8 @@ typedef struct VVCContext {
uint64_t nb_frames; ///< processed frames
int nb_delayed; ///< delayed frames
+
+ enum AVPixelFormat pix_fmt; ///< pix format of current frame
} VVCContext ;
#endif /* AVCODEC_VVC_VVCDEC_H */
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
` (4 preceding siblings ...)
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com
@ 2024-03-28 1:26 ` fei.w.wang-at-intel.com
2024-04-02 12:48 ` Nuo Mi
2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson
6 siblings, 1 reply; 14+ messages in thread
From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: fei.w.wang
From: Fei Wang <fei.w.wang@intel.com>
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
Changelog | 4 +
configure | 3 +
libavcodec/Makefile | 1 +
libavcodec/hwaccels.h | 1 +
libavcodec/vaapi_decode.c | 7 +
libavcodec/vaapi_vvc.c | 657 ++++++++++++++++++++++++++++++++++++++
libavcodec/version.h | 2 +-
libavcodec/vvc/vvcdec.c | 15 +-
8 files changed, 688 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/vaapi_vvc.c
diff --git a/Changelog b/Changelog
index e83a00e35c..3108e65558 100644
--- a/Changelog
+++ b/Changelog
@@ -1,6 +1,10 @@
Entries are sorted chronologically from oldest to youngest within each release,
releases are sorted from youngest to oldest.
+version <next>:
+- VVC VAAPI decoder
+
+
version 7.0:
- DXV DXT1 encoder
- LEAD MCMP decoder
diff --git a/configure b/configure
index 2a1d22310b..d902c9adc8 100755
--- a/configure
+++ b/configure
@@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9"
vp9_vdpau_hwaccel_select="vp9_decoder"
vp9_videotoolbox_hwaccel_deps="videotoolbox"
vp9_videotoolbox_hwaccel_select="vp9_decoder"
+vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC"
+vvc_vaapi_hwaccel_select="vvc_decoder"
wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel"
wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel"
@@ -7250,6 +7252,7 @@ if enabled vaapi; then
check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, 0)"
check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC"
+ check_type "va/va.h va/va_dec_vvc.h" "VAPictureParameterBufferVVC"
check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth
check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx
check_type "va/va.h va/va_vpp.h" "VAProcFilterParameterBufferHDRToneMapping"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9ce6d445c1..343b037636 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
+OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 5171e4c7d7..88d6b9a9b5 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -82,6 +82,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel;
extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_vvc_vaapi_hwaccel;
extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel;
extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel;
extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel;
diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index 1b1972a2a9..ceeb1f1a12 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -455,6 +455,9 @@ static const struct {
MAP(AV1, AV1_MAIN, AV1Profile0),
MAP(AV1, AV1_HIGH, AV1Profile1),
#endif
+#if VA_CHECK_VERSION(1, 22, 0)
+ MAP(H266, VVC_MAIN_10, VVCMain10),
+#endif
#undef MAP
};
@@ -627,6 +630,10 @@ static int vaapi_decode_make_config(AVCodecContext *avctx,
case AV_CODEC_ID_VP8:
frames->initial_pool_size += 3;
break;
+ case AV_CODEC_ID_H266:
+ // Add additional 16 for maximum 16 frames delay in vvc native decode.
+ frames->initial_pool_size += 32;
+ break;
default:
frames->initial_pool_size += 2;
}
diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c
new file mode 100644
index 0000000000..6141005688
--- /dev/null
+++ b/libavcodec/vaapi_vvc.c
@@ -0,0 +1,657 @@
+/*
+ * VVC HW decode acceleration through VA API
+ *
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_dec_vvc.h>
+
+#include "vvc/vvcdec.h"
+#include "vvc/vvc_refs.h"
+#include "hwaccel_internal.h"
+#include "vaapi_decode.h"
+
+typedef struct VAAPIDecodePictureVVC {
+ VAAPIDecodePicture pic;
+ VAPictureParameterBufferVVC pic_param;
+ VASliceParameterBufferVVC slice_param;
+ int decode_issued;
+} VAAPIDecodePictureVVC;
+
+static void init_vaapi_pic(VAPictureVVC *va_pic)
+{
+ va_pic->picture_id = VA_INVALID_ID;
+ va_pic->flags = VA_PICTURE_VVC_INVALID;
+ va_pic->pic_order_cnt = 0;
+}
+
+static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame *pic)
+{
+ va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame);
+ va_pic->pic_order_cnt = pic->poc;
+ va_pic->flags = 0;
+
+ if (pic->flags & VVC_FRAME_FLAG_LONG_REF)
+ va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE;
+}
+
+static void fill_vaapi_reference_frames(const VVCFrameContext *h, VAPictureParameterBufferVVC *pp)
+{
+ const VVCFrame *current_picture = h->ref;
+ int i, j;
+
+ for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
+ const VVCFrame *frame = NULL;
+
+ while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) {
+ if ((&h->DPB[j] != current_picture ) &&
+ (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF | VVC_FRAME_FLAG_SHORT_REF)))
+ frame = &h->DPB[j];
+ j++;
+ }
+
+ init_vaapi_pic(&pp->ReferenceFrames[i]);
+
+ if (frame) {
+ VAAPIDecodePictureVVC *pic;
+ fill_vaapi_pic(&pp->ReferenceFrames[i], frame);
+ pic = frame->hwaccel_picture_private;
+ if (!pic->decode_issued)
+ pp->ReferenceFrames[i].flags |= VA_PICTURE_VVC_UNAVAILABLE_REFERENCE;
+ }
+ }
+}
+
+static int vaapi_vvc_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ const VVCContext *h = avctx->priv_data;
+ VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs];
+ const H266RawSPS *sps = fc->ps.sps->r;
+ const H266RawPPS *pps = fc->ps.pps->r;
+ const H266RawPictureHeader *ph = fc->ps.ph.r;
+ VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
+ VAPictureParameterBufferVVC *pic_param = &pic->pic_param;
+ uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] = {0};
+ int i, j, k, err;
+
+ pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref->frame);
+
+ *pic_param = (VAPictureParameterBufferVVC) {
+ .pps_pic_width_in_luma_samples = pps->pps_pic_width_in_luma_samples,
+ .pps_pic_height_in_luma_samples = pps->pps_pic_height_in_luma_samples,
+ .sps_num_subpics_minus1 = sps->sps_num_subpics_minus1,
+ .sps_chroma_format_idc = sps->sps_chroma_format_idc,
+ .sps_bitdepth_minus8 = sps->sps_bitdepth_minus8,
+ .sps_log2_ctu_size_minus5 = sps->sps_log2_ctu_size_minus5,
+ .sps_log2_min_luma_coding_block_size_minus2 = sps->sps_log2_min_luma_coding_block_size_minus2,
+ .sps_log2_transform_skip_max_size_minus2 = sps->sps_log2_transform_skip_max_size_minus2,
+ .sps_six_minus_max_num_merge_cand = sps->sps_six_minus_max_num_merge_cand,
+ .sps_five_minus_max_num_subblock_merge_cand = sps->sps_five_minus_max_num_subblock_merge_cand,
+ .sps_max_num_merge_cand_minus_max_num_gpm_cand = sps->sps_max_num_merge_cand_minus_max_num_gpm_cand,
+ .sps_log2_parallel_merge_level_minus2 = sps->sps_log2_parallel_merge_level_minus2,
+ .sps_min_qp_prime_ts = sps->sps_min_qp_prime_ts,
+ .sps_six_minus_max_num_ibc_merge_cand = sps->sps_six_minus_max_num_ibc_merge_cand,
+ .sps_num_ladf_intervals_minus2 = sps->sps_num_ladf_intervals_minus2,
+ .sps_ladf_lowest_interval_qp_offset = sps->sps_ladf_lowest_interval_qp_offset,
+ .sps_flags.bits = {
+ .sps_subpic_info_present_flag = sps->sps_subpic_info_present_flag,
+ .sps_independent_subpics_flag = sps->sps_independent_subpics_flag,
+ .sps_subpic_same_size_flag = sps->sps_subpic_same_size_flag,
+ .sps_entropy_coding_sync_enabled_flag = sps->sps_entropy_coding_sync_enabled_flag,
+ .sps_qtbtt_dual_tree_intra_flag = sps->sps_qtbtt_dual_tree_intra_flag,
+ .sps_max_luma_transform_size_64_flag = sps->sps_max_luma_transform_size_64_flag,
+ .sps_transform_skip_enabled_flag = sps->sps_transform_skip_enabled_flag,
+ .sps_bdpcm_enabled_flag = sps->sps_bdpcm_enabled_flag,
+ .sps_mts_enabled_flag = sps->sps_mts_enabled_flag,
+ .sps_explicit_mts_intra_enabled_flag = sps->sps_explicit_mts_intra_enabled_flag,
+ .sps_explicit_mts_inter_enabled_flag = sps->sps_explicit_mts_inter_enabled_flag,
+ .sps_lfnst_enabled_flag = sps->sps_lfnst_enabled_flag,
+ .sps_joint_cbcr_enabled_flag = sps->sps_joint_cbcr_enabled_flag,
+ .sps_same_qp_table_for_chroma_flag = sps->sps_same_qp_table_for_chroma_flag,
+ .sps_sao_enabled_flag = sps->sps_sao_enabled_flag,
+ .sps_alf_enabled_flag = sps->sps_alf_enabled_flag,
+ .sps_ccalf_enabled_flag = sps->sps_alf_enabled_flag,
+ .sps_lmcs_enabled_flag = sps->sps_lmcs_enabled_flag,
+ .sps_sbtmvp_enabled_flag = sps->sps_sbtmvp_enabled_flag,
+ .sps_amvr_enabled_flag = sps->sps_amvr_enabled_flag,
+ .sps_smvd_enabled_flag = sps->sps_smvd_enabled_flag,
+ .sps_mmvd_enabled_flag = sps->sps_mmvd_enabled_flag,
+ .sps_sbt_enabled_flag = sps->sps_sbt_enabled_flag,
+ .sps_affine_enabled_flag = sps->sps_affine_enabled_flag,
+ .sps_6param_affine_enabled_flag = sps->sps_6param_affine_enabled_flag,
+ .sps_affine_amvr_enabled_flag = sps->sps_affine_amvr_enabled_flag,
+ .sps_affine_prof_enabled_flag = sps->sps_affine_prof_enabled_flag,
+ .sps_bcw_enabled_flag = sps->sps_bcw_enabled_flag,
+ .sps_ciip_enabled_flag = sps->sps_ciip_enabled_flag,
+ .sps_gpm_enabled_flag = sps->sps_gpm_enabled_flag,
+ .sps_isp_enabled_flag = sps->sps_isp_enabled_flag,
+ .sps_mrl_enabled_flag = sps->sps_mrl_enabled_flag,
+ .sps_mip_enabled_flag = sps->sps_mip_enabled_flag,
+ .sps_cclm_enabled_flag = sps->sps_cclm_enabled_flag,
+ .sps_chroma_horizontal_collocated_flag = sps->sps_chroma_horizontal_collocated_flag,
+ .sps_chroma_vertical_collocated_flag = sps->sps_chroma_vertical_collocated_flag,
+ .sps_palette_enabled_flag = sps->sps_palette_enabled_flag,
+ .sps_act_enabled_flag = sps->sps_act_enabled_flag,
+ .sps_ibc_enabled_flag = sps->sps_ibc_enabled_flag,
+ .sps_ladf_enabled_flag = sps->sps_ladf_enabled_flag,
+ .sps_explicit_scaling_list_enabled_flag = sps->sps_explicit_scaling_list_enabled_flag,
+ .sps_scaling_matrix_for_lfnst_disabled_flag = sps->sps_scaling_matrix_for_lfnst_disabled_flag,
+ .sps_scaling_matrix_for_alternative_colour_space_disabled_flag = sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag,
+ .sps_scaling_matrix_designated_colour_space_flag = sps->sps_scaling_matrix_designated_colour_space_flag,
+ .sps_virtual_boundaries_enabled_flag = sps->sps_virtual_boundaries_enabled_flag,
+ .sps_virtual_boundaries_present_flag = sps->sps_virtual_boundaries_present_flag,
+ },
+ .NumVerVirtualBoundaries = sps->sps_virtual_boundaries_present_flag ?
+ sps->sps_num_ver_virtual_boundaries :
+ ph->ph_num_ver_virtual_boundaries,
+ .NumHorVirtualBoundaries = sps->sps_virtual_boundaries_present_flag ?
+ sps->sps_num_hor_virtual_boundaries :
+ ph->ph_num_hor_virtual_boundaries,
+ .pps_scaling_win_left_offset = pps->pps_scaling_win_left_offset,
+ .pps_scaling_win_right_offset = pps->pps_scaling_win_right_offset,
+ .pps_scaling_win_top_offset = pps->pps_scaling_win_top_offset,
+ .pps_scaling_win_bottom_offset = pps->pps_scaling_win_bottom_offset,
+ .pps_num_exp_tile_columns_minus1 = pps->pps_num_exp_tile_columns_minus1,
+ .pps_num_exp_tile_rows_minus1 = pps->pps_num_exp_tile_rows_minus1,
+ .pps_num_slices_in_pic_minus1 = pps->pps_num_slices_in_pic_minus1,
+ .pps_pic_width_minus_wraparound_offset = pps->pps_pic_width_minus_wraparound_offset,
+ .pps_cb_qp_offset = pps->pps_cb_qp_offset,
+ .pps_cr_qp_offset = pps->pps_cr_qp_offset,
+ .pps_joint_cbcr_qp_offset_value = pps->pps_joint_cbcr_qp_offset_value,
+ .pps_chroma_qp_offset_list_len_minus1 = pps->pps_chroma_qp_offset_list_len_minus1,
+ .pps_flags.bits = {
+ .pps_loop_filter_across_tiles_enabled_flag = pps->pps_loop_filter_across_tiles_enabled_flag,
+ .pps_rect_slice_flag = pps->pps_rect_slice_flag,
+ .pps_single_slice_per_subpic_flag = pps->pps_single_slice_per_subpic_flag,
+ .pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag,
+ .pps_weighted_pred_flag = pps->pps_weighted_pred_flag,
+ .pps_weighted_bipred_flag = pps->pps_weighted_bipred_flag,
+ .pps_ref_wraparound_enabled_flag = pps->pps_ref_wraparound_enabled_flag,
+ .pps_cu_qp_delta_enabled_flag = pps->pps_cu_qp_delta_enabled_flag,
+ .pps_cu_chroma_qp_offset_list_enabled_flag = pps->pps_cu_chroma_qp_offset_list_enabled_flag,
+ .pps_deblocking_filter_override_enabled_flag = pps->pps_deblocking_filter_override_enabled_flag,
+ .pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag,
+ .pps_dbf_info_in_ph_flag = pps->pps_dbf_info_in_ph_flag,
+ .pps_sao_info_in_ph_flag = pps->pps_sao_info_in_ph_flag,
+ .pps_alf_info_in_ph_flag = pps->pps_alf_info_in_ph_flag,
+ },
+ .ph_lmcs_aps_id = ph->ph_lmcs_aps_id,
+ .ph_scaling_list_aps_id = ph->ph_scaling_list_aps_id,
+ .ph_log2_diff_min_qt_min_cb_intra_slice_luma = ph->ph_log2_diff_min_qt_min_cb_intra_slice_luma,
+ .ph_max_mtt_hierarchy_depth_intra_slice_luma = ph->ph_max_mtt_hierarchy_depth_intra_slice_luma,
+ .ph_log2_diff_max_bt_min_qt_intra_slice_luma = ph->ph_log2_diff_max_bt_min_qt_intra_slice_luma,
+ .ph_log2_diff_max_tt_min_qt_intra_slice_luma = ph->ph_log2_diff_max_tt_min_qt_intra_slice_luma,
+ .ph_log2_diff_min_qt_min_cb_intra_slice_chroma = ph->ph_log2_diff_min_qt_min_cb_intra_slice_chroma,
+ .ph_max_mtt_hierarchy_depth_intra_slice_chroma = ph->ph_max_mtt_hierarchy_depth_intra_slice_chroma,
+ .ph_log2_diff_max_bt_min_qt_intra_slice_chroma = ph->ph_log2_diff_max_bt_min_qt_intra_slice_chroma,
+ .ph_log2_diff_max_tt_min_qt_intra_slice_chroma = ph->ph_log2_diff_max_tt_min_qt_intra_slice_chroma,
+ .ph_cu_qp_delta_subdiv_intra_slice = ph->ph_cu_qp_delta_subdiv_intra_slice,
+ .ph_cu_chroma_qp_offset_subdiv_intra_slice = ph->ph_cu_chroma_qp_offset_subdiv_intra_slice,
+ .ph_log2_diff_min_qt_min_cb_inter_slice = ph->ph_log2_diff_min_qt_min_cb_inter_slice,
+ .ph_max_mtt_hierarchy_depth_inter_slice = ph->ph_max_mtt_hierarchy_depth_inter_slice,
+ .ph_log2_diff_max_bt_min_qt_inter_slice = ph->ph_log2_diff_max_bt_min_qt_inter_slice,
+ .ph_log2_diff_max_tt_min_qt_inter_slice = ph->ph_log2_diff_max_tt_min_qt_inter_slice,
+ .ph_cu_qp_delta_subdiv_inter_slice = ph->ph_cu_qp_delta_subdiv_inter_slice,
+ .ph_cu_chroma_qp_offset_subdiv_inter_slice = ph->ph_cu_chroma_qp_offset_subdiv_inter_slice,
+ .ph_flags.bits= {
+ .ph_non_ref_pic_flag = ph->ph_non_ref_pic_flag,
+ .ph_alf_enabled_flag = ph->ph_alf_enabled_flag,
+ .ph_alf_cb_enabled_flag = ph->ph_alf_cb_enabled_flag,
+ .ph_alf_cr_enabled_flag = ph->ph_alf_cr_enabled_flag,
+ .ph_alf_cc_cb_enabled_flag = ph->ph_alf_cc_cb_enabled_flag,
+ .ph_alf_cc_cr_enabled_flag = ph->ph_alf_cc_cr_enabled_flag,
+ .ph_lmcs_enabled_flag = ph->ph_lmcs_enabled_flag,
+ .ph_chroma_residual_scale_flag = ph->ph_chroma_residual_scale_flag,
+ .ph_explicit_scaling_list_enabled_flag = ph->ph_explicit_scaling_list_enabled_flag,
+ .ph_virtual_boundaries_present_flag = ph->ph_virtual_boundaries_present_flag,
+ .ph_temporal_mvp_enabled_flag = ph->ph_temporal_mvp_enabled_flag,
+ .ph_mmvd_fullpel_only_flag = ph->ph_mmvd_fullpel_only_flag,
+ .ph_mvd_l1_zero_flag = ph->ph_mvd_l1_zero_flag,
+ .ph_bdof_disabled_flag = ph->ph_bdof_disabled_flag,
+ .ph_dmvr_disabled_flag = ph->ph_dmvr_disabled_flag,
+ .ph_prof_disabled_flag = ph->ph_prof_disabled_flag,
+ .ph_joint_cbcr_sign_flag = ph->ph_joint_cbcr_sign_flag,
+ .ph_sao_luma_enabled_flag = ph->ph_sao_luma_enabled_flag,
+ .ph_sao_chroma_enabled_flag = ph->ph_sao_chroma_enabled_flag,
+ .ph_deblocking_filter_disabled_flag = ph->ph_deblocking_filter_disabled_flag,
+ },
+ .PicMiscFlags.fields = {
+ .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag ? 0 : IS_IRAP(h) ? 1 : 0,
+ }
+ };
+
+ fill_vaapi_pic(&pic_param->CurrPic, fc->ref);
+ fill_vaapi_reference_frames(fc, pic_param);
+
+ for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++)
+ for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++)
+ pic_param->ChromaQpTable[i][j] = fc->ps.sps->chroma_qp_table[i][j];
+ for (i = 0; i < 4; i++) {
+ pic_param->sps_ladf_qp_offset[i] = sps->sps_ladf_qp_offset[i];
+ pic_param->sps_ladf_delta_threshold_minus1[i] = sps->sps_ladf_delta_threshold_minus1[i];
+ }
+
+ for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? sps->sps_num_ver_virtual_boundaries : ph->ph_num_ver_virtual_boundaries); i++) {
+ pic_param->VirtualBoundaryPosX[i] = (sps->sps_virtual_boundaries_present_flag ?
+ (sps->sps_virtual_boundary_pos_x_minus1[ i ] + 1) :
+ (ph->ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8;
+ }
+
+ for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? sps->sps_num_hor_virtual_boundaries : ph->ph_num_hor_virtual_boundaries); i++) {
+ pic_param->VirtualBoundaryPosY[i] = (sps->sps_virtual_boundaries_present_flag ?
+ (sps->sps_virtual_boundary_pos_y_minus1[ i ] + 1) :
+ (ph->ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8;
+ }
+
+ for (i = 0; i < 6; i++) {
+ pic_param->pps_cb_qp_offset_list[i] = pps->pps_cb_qp_offset_list[i];
+ pic_param->pps_cr_qp_offset_list[i] = pps->pps_cr_qp_offset_list[i];
+ pic_param->pps_joint_cbcr_qp_offset_list[i] = pps->pps_joint_cbcr_qp_offset_list[i];
+ }
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VAPictureParameterBufferType,
+ &pic->pic_param, sizeof(VAPictureParameterBufferVVC));
+ if (err < 0)
+ goto fail;
+
+ for (i = 0; i <= sps->sps_num_subpics_minus1 && sps->sps_subpic_info_present_flag; i++) {
+ VASubPicVVC subpic_param = {
+ .sps_subpic_ctu_top_left_x = sps->sps_subpic_ctu_top_left_x[i],
+ .sps_subpic_ctu_top_left_y = sps->sps_subpic_ctu_top_left_y[i],
+ .sps_subpic_width_minus1 = sps->sps_subpic_width_minus1[i],
+ .sps_subpic_height_minus1 = sps->sps_subpic_height_minus1[i],
+ .SubpicIdVal = pps->sub_pic_id_val[i],
+ .subpic_flags.bits = {
+ .sps_subpic_treated_as_pic_flag = sps->sps_subpic_treated_as_pic_flag[i],
+ .sps_loop_filter_across_subpic_enabled_flag = sps->sps_loop_filter_across_subpic_enabled_flag[i],
+ }
+ };
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VASubPicBufferType,
+ &subpic_param, sizeof(VASubPicVVC));
+ if (err < 0)
+ goto fail;
+ }
+
+ for (i = 0; i < VVC_MAX_ALF_COUNT; i++) {
+ const VVCALF *alf_list = h->ps.alf_list[i];
+ if (alf_list) {
+ const H266RawAPS *alf = alf_list->r;
+ VAAlfDataVVC alf_param = {
+ .aps_adaptation_parameter_set_id = i,
+ .alf_luma_num_filters_signalled_minus1 = alf->alf_luma_num_filters_signalled_minus1,
+ .alf_chroma_num_alt_filters_minus1 = alf->alf_chroma_num_alt_filters_minus1,
+ .alf_cc_cb_filters_signalled_minus1 = alf->alf_cc_cb_filters_signalled_minus1,
+ .alf_cc_cr_filters_signalled_minus1 = alf->alf_cc_cr_filters_signalled_minus1,
+ .alf_flags.bits = {
+ .alf_luma_filter_signal_flag = alf->alf_luma_filter_signal_flag,
+ .alf_chroma_filter_signal_flag = alf->alf_chroma_filter_signal_flag,
+ .alf_cc_cb_filter_signal_flag = alf->alf_cc_cb_filter_signal_flag,
+ .alf_cc_cr_filter_signal_flag = alf->alf_cc_cr_filter_signal_flag,
+ .alf_luma_clip_flag = alf->alf_luma_clip_flag,
+ .alf_chroma_clip_flag = alf->alf_chroma_clip_flag,
+ }
+ };
+
+ for (j = 0; j < 25; j++)
+ alf_param.alf_luma_coeff_delta_idx[j] = alf->alf_luma_coeff_delta_idx[j];
+
+ for (j = 0; j < 25; j++) {
+ for (k = 0; k < 12; k++) {
+ alf_param.filtCoeff[j][k] = alf->alf_luma_coeff_abs[j][k] * (1 - 2 * alf->alf_luma_coeff_sign[j][k]);
+ alf_param.alf_luma_clip_idx[j][k] = alf->alf_luma_clip_idx[j][k];
+ }
+ }
+
+ for (j = 0; j < 8; j++) {
+ for (k = 0; k < 6; k++) {
+ alf_param.AlfCoeffC[j][k] = alf->alf_chroma_coeff_abs[j][k] * (1 - 2 * alf->alf_chroma_coeff_sign[j][k]);
+ alf_param.alf_chroma_clip_idx[j][k] = alf->alf_chroma_clip_idx[j][k];
+ }
+ }
+
+ for (j = 0; j < 4; j++) {
+ for (k = 0; k < 7; k++) {
+ if (alf->alf_cc_cb_mapped_coeff_abs[j][k])
+ alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 * alf->alf_cc_cb_coeff_sign[j][k]) * (1 << (alf->alf_cc_cb_mapped_coeff_abs[j][k] - 1));
+ if (alf->alf_cc_cr_mapped_coeff_abs[j][k])
+ alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 * alf->alf_cc_cr_coeff_sign[j][k]) * (1 << (alf->alf_cc_cr_mapped_coeff_abs[j][k] - 1));
+ }
+ }
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VAAlfBufferType,
+ &alf_param, sizeof(VAAlfDataVVC));
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) {
+ const H266RawAPS *lmcs = h->ps.lmcs_list[i];
+ if (lmcs) {
+ VALmcsDataVVC lmcs_param = {
+ .aps_adaptation_parameter_set_id = i,
+ .lmcs_min_bin_idx = lmcs->lmcs_min_bin_idx,
+ .lmcs_delta_max_bin_idx = lmcs->lmcs_delta_max_bin_idx,
+ .lmcsDeltaCrs = (1 - 2 * lmcs->lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs,
+ };
+
+ for (j = lmcs->lmcs_min_bin_idx; j <= 15 - lmcs->lmcs_delta_max_bin_idx; j++)
+ lmcs_param.lmcsDeltaCW[j] = (1 - 2 * lmcs->lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j];
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VALmcsBufferType,
+ &lmcs_param, sizeof(VALmcsDataVVC));
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < VVC_MAX_SL_COUNT; i++) {
+ const VVCScalingList *sl = h->ps.scaling_list[i];
+ if (sl) {
+ int l;
+
+ VAScalingListVVC sl_param = {
+ .aps_adaptation_parameter_set_id = i,
+ };
+
+ for (j = 0; j < 14; j++)
+ sl_param.ScalingMatrixDCRec[j] = sl->scaling_matrix_dc_rec[j];
+
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ for (l = 0; l < 2; l++)
+ sl_param.ScalingMatrixRec2x2[j][k][l] = sl->scaling_matrix_rec[j][l * 2 + k];
+
+ for (j = 2; j < 8; j++)
+ for (k = 0; k < 4; k++)
+ for (l = 0; l < 4; l++)
+ sl_param.ScalingMatrixRec4x4[j - 2][k][l] = sl->scaling_matrix_rec[j][l * 4 + k];
+
+ for (j = 8; j < 28; j++)
+ for (k = 0; k < 8; k++)
+ for (l = 0; l < 8; l++)
+ sl_param.ScalingMatrixRec8x8[j - 8][k][l] = sl->scaling_matrix_rec[j][l * 8 + k];
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VAIQMatrixBufferType,
+ &sl_param, sizeof(VAScalingListVVC));
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) {
+ tile_dim = pps->pps_tile_column_width_minus1[i];
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VATileBufferType,
+ &tile_dim, sizeof(tile_dim));
+ if (err < 0)
+ goto fail;
+ }
+
+ for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) {
+ tile_dim = pps->pps_tile_row_height_minus1[i];
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VATileBufferType,
+ &tile_dim, sizeof(tile_dim));
+ if (err < 0)
+ goto fail;
+ }
+
+ if (!pps->pps_no_pic_partition_flag && pps->pps_rect_slice_flag && !pps->pps_single_slice_per_subpic_flag) {
+ for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
+ for (j = 0; j < pps->pps_num_exp_slices_in_tile[i]; j++) {
+ exp_slice_height_in_ctus[i + j] = pps->pps_exp_slice_height_in_ctus_minus1[i][j] + 1;
+ }
+ }
+ for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
+ VASliceStructVVC ss_param = {
+ .SliceTopLeftTileIdx = pps->slice_top_left_tile_idx[i],
+ .pps_slice_width_in_tiles_minus1 = pps->pps_slice_width_in_tiles_minus1[i],
+ .pps_slice_height_in_tiles_minus1 = pps->pps_slice_height_in_tiles_minus1[i],
+ };
+
+ if (pps->pps_slice_width_in_tiles_minus1[i] > 0 || pps->pps_slice_height_in_tiles_minus1[i] > 0)
+ ss_param.pps_exp_slice_height_in_ctus_minus1 = 0;
+ else {
+ if (pps->num_slices_in_tile[i] == 1)
+ ss_param.pps_exp_slice_height_in_ctus_minus1 = pps->row_height_val[pps->slice_top_left_tile_idx[i] / pps->num_tile_columns] - 1;
+ else if (exp_slice_height_in_ctus[i])
+ ss_param.pps_exp_slice_height_in_ctus_minus1 = exp_slice_height_in_ctus[i] - 1;
+ else
+ continue;
+ }
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
+ VASliceStructBufferType,
+ &ss_param, sizeof(VASliceStructVVC));
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ ff_vaapi_decode_cancel(avctx, &pic->pic);
+ return err;
+}
+
+static uint8_t get_ref_pic_index(const VVCContext *h, const VVCFrame *frame)
+{
+ VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs];
+ VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
+ VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC *)&pic->pic_param;
+ uint8_t i;
+
+ if (!frame)
+ return 0xFF;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
+ VASurfaceID pid = pp->ReferenceFrames[i].picture_id;
+ int poc = pp->ReferenceFrames[i].pic_order_cnt;
+ if (pid != VA_INVALID_ID && pid == ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc)
+ return i;
+ }
+
+ return 0xFF;
+}
+
+static int get_slice_data_offset(const uint8_t *buffer, uint32_t size, const SliceContext* sc)
+{
+ const H266RawSlice *slice = sc->ref;
+ int num_identical_bytes = slice->data_size < 32 ? slice->data_size : 32;
+
+ for (int i = 0; i < size; i++) {
+ int skip_bytes = 0;
+ if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] && !buffer[i - 2])
+ continue;
+
+ for (int j = 0; j < num_identical_bytes; j++) {
+ if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 && !buffer[i + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2])
+ skip_bytes++;
+
+ if (buffer[i + j + skip_bytes] != slice->data[j])
+ break;
+
+ if (j + 1 == num_identical_bytes)
+ return i;
+ }
+ }
+
+ return 0;
+}
+
+static int vaapi_vvc_decode_slice(AVCodecContext *avctx,
+ const uint8_t *buffer,
+ uint32_t size)
+{
+ const VVCContext *h = avctx->priv_data;
+ VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs];
+ const SliceContext *sc = fc->slices[fc->nb_slices];
+ const H266RawPPS *pps = fc->ps.pps->r;
+ const H266RawPictureHeader *ph = fc->ps.ph.r;
+ const H266RawSliceHeader *sh = sc->sh.r;
+ VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
+ VASliceParameterBufferVVC *slice_param = &pic->slice_param;
+ int nb_list, i, err;
+
+ *slice_param = (VASliceParameterBufferVVC) {
+ .slice_data_size = size,
+ .slice_data_offset = 0,
+ .slice_data_flag = VA_SLICE_DATA_FLAG_ALL,
+ .slice_data_byte_offset = get_slice_data_offset(buffer, size, sc),
+ .sh_subpic_id = sh->sh_subpic_id,
+ .sh_slice_address = sh->sh_slice_address,
+ .sh_num_tiles_in_slice_minus1 = sh->sh_num_tiles_in_slice_minus1,
+ .sh_slice_type = sh->sh_slice_type,
+ .sh_num_alf_aps_ids_luma = sh->sh_num_alf_aps_ids_luma,
+ .sh_alf_aps_id_chroma = sh->sh_alf_aps_id_chroma,
+ .sh_alf_cc_cb_aps_id = sh->sh_alf_cc_cb_aps_id,
+ .sh_alf_cc_cr_aps_id = sh->sh_alf_cc_cr_aps_id,
+ .NumRefIdxActive[0] = sh->num_ref_idx_active[0],
+ .NumRefIdxActive[1] = sh->num_ref_idx_active[1],
+ .sh_collocated_ref_idx = sh->sh_collocated_ref_idx,
+ .SliceQpY = pps->pps_qp_delta_info_in_ph_flag ?
+ 26 + pps->pps_init_qp_minus26 + ph->ph_qp_delta :
+ 26 + pps->pps_init_qp_minus26 + sh->sh_qp_delta,
+ .sh_cb_qp_offset = sh->sh_cb_qp_offset,
+ .sh_cr_qp_offset = sh->sh_cr_qp_offset,
+ .sh_joint_cbcr_qp_offset = sh->sh_joint_cbcr_qp_offset,
+ .sh_luma_beta_offset_div2 = sh->sh_luma_beta_offset_div2,
+ .sh_luma_tc_offset_div2 = sh->sh_luma_tc_offset_div2,
+ .sh_cb_beta_offset_div2 = sh->sh_cb_beta_offset_div2,
+ .sh_cb_tc_offset_div2 = sh->sh_cb_tc_offset_div2,
+ .sh_cr_beta_offset_div2 = sh->sh_cr_beta_offset_div2,
+ .sh_cr_tc_offset_div2 = sh->sh_cr_tc_offset_div2,
+ .WPInfo = {
+ .luma_log2_weight_denom = sh->sh_pred_weight_table.luma_log2_weight_denom,
+ .delta_chroma_log2_weight_denom = sh->sh_pred_weight_table.delta_chroma_log2_weight_denom,
+ .num_l0_weights = sh->sh_pred_weight_table.num_l0_weights,
+ .num_l1_weights = sh->sh_pred_weight_table.num_l1_weights,
+ },
+ .sh_flags.bits = {
+ .sh_alf_enabled_flag = sh->sh_alf_enabled_flag,
+ .sh_alf_cb_enabled_flag = sh->sh_alf_cb_enabled_flag,
+ .sh_alf_cr_enabled_flag = sh->sh_alf_cr_enabled_flag,
+ .sh_alf_cc_cb_enabled_flag = sh->sh_alf_cc_cb_enabled_flag,
+ .sh_alf_cc_cr_enabled_flag = sh->sh_alf_cc_cr_enabled_flag,
+ .sh_lmcs_used_flag = sh->sh_lmcs_used_flag,
+ .sh_explicit_scaling_list_used_flag = sh->sh_explicit_scaling_list_used_flag,
+ .sh_cabac_init_flag = sh->sh_cabac_init_flag,
+ .sh_collocated_from_l0_flag = sh->sh_collocated_from_l0_flag,
+ .sh_cu_chroma_qp_offset_enabled_flag = sh->sh_cu_chroma_qp_offset_enabled_flag,
+ .sh_sao_luma_used_flag = sh->sh_sao_luma_used_flag,
+ .sh_sao_chroma_used_flag = sh->sh_sao_chroma_used_flag,
+ .sh_deblocking_filter_disabled_flag = sh->sh_deblocking_filter_disabled_flag,
+ .sh_dep_quant_used_flag = sh->sh_dep_quant_used_flag,
+ .sh_sign_data_hiding_used_flag = sh->sh_sign_data_hiding_used_flag,
+ .sh_ts_residual_coding_disabled_flag = sh->sh_ts_residual_coding_disabled_flag,
+ },
+ };
+
+ memset(&slice_param->RefPicList, 0xFF, sizeof(slice_param->RefPicList));
+
+ nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ?
+ 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1);
+ for (int list_idx = 0; list_idx < nb_list; list_idx++) {
+ RefPicList *rpl = &sc->rpl[list_idx];
+
+ for (i = 0; i < rpl->nb_refs; i++)
+ slice_param->RefPicList[list_idx][i] = get_ref_pic_index(h, rpl->ref[i]);
+ }
+
+ for (i = 0; i < 7; i++)
+ slice_param->sh_alf_aps_id_luma[i] = sh->sh_alf_aps_id_luma[i];
+
+ for (i = 0; i < 15; i++) {
+ slice_param->WPInfo.luma_weight_l0_flag[i] = sh->sh_pred_weight_table.luma_weight_l0_flag[i];
+ slice_param->WPInfo.chroma_weight_l0_flag[i] = sh->sh_pred_weight_table.chroma_weight_l0_flag[i];
+ slice_param->WPInfo.delta_luma_weight_l0[i] = sh->sh_pred_weight_table.delta_luma_weight_l0[i];
+ slice_param->WPInfo.luma_offset_l0[i] = sh->sh_pred_weight_table.luma_offset_l0[i];
+ slice_param->WPInfo.luma_weight_l1_flag[i] = sh->sh_pred_weight_table.luma_weight_l1_flag[i];
+ slice_param->WPInfo.chroma_weight_l1_flag[i] = sh->sh_pred_weight_table.chroma_weight_l1_flag[i];
+ slice_param->WPInfo.delta_luma_weight_l1[i] = sh->sh_pred_weight_table.delta_luma_weight_l1[i];
+ slice_param->WPInfo.luma_offset_l1[i] = sh->sh_pred_weight_table.luma_offset_l1[i];
+ }
+
+ for (i = 0; i < 15; i++) {
+ for (int j = 0; j < 2; j++) {
+ slice_param->WPInfo.delta_chroma_weight_l0[i][j] = sh->sh_pred_weight_table.delta_chroma_weight_l0[i][j];
+ slice_param->WPInfo.delta_chroma_offset_l0[i][j] = sh->sh_pred_weight_table.delta_chroma_offset_l0[i][j];
+ slice_param->WPInfo.delta_chroma_weight_l1[i][j] = sh->sh_pred_weight_table.delta_chroma_weight_l1[i][j];
+ slice_param->WPInfo.delta_chroma_offset_l1[i][j] = sh->sh_pred_weight_table.delta_chroma_offset_l1[i][j];
+ }
+ }
+
+ err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic,
+ &pic->slice_param,
+ sizeof(VASliceParameterBufferVVC),
+ buffer, size);
+ if (err) {
+ ff_vaapi_decode_cancel(avctx, &pic->pic);
+ return err;
+ }
+
+ return 0;
+}
+
+static int vaapi_vvc_end_frame(AVCodecContext *avctx)
+{
+
+ const VVCContext *h = avctx->priv_data;
+ VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs];
+ VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
+ int ret;
+
+ ret = ff_vaapi_decode_issue(avctx, &pic->pic);
+ if (ret < 0)
+ goto fail;
+
+ pic->decode_issued = 1;
+
+ return 0;
+
+fail:
+ ff_vaapi_decode_cancel(avctx, &pic->pic);
+ return ret;
+}
+
+const FFHWAccel ff_vvc_vaapi_hwaccel = {
+ .p.name = "vvc_vaapi",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_VVC,
+ .p.pix_fmt = AV_PIX_FMT_VAAPI,
+ .start_frame = &vaapi_vvc_start_frame,
+ .end_frame = &vaapi_vvc_end_frame,
+ .decode_slice = &vaapi_vvc_decode_slice,
+ .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC),
+ .init = &ff_vaapi_decode_init,
+ .uninit = &ff_vaapi_decode_uninit,
+ .frame_params = &ff_vaapi_common_frame_params,
+ .priv_data_size = sizeof(VAAPIDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 06631ffa8c..7aa95fc3f1 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
#include "version_major.h"
-#define LIBAVCODEC_VERSION_MINOR 4
+#define LIBAVCODEC_VERSION_MINOR 5
#define LIBAVCODEC_VERSION_MICRO 100
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
index f2e269ce76..b204a0c73a 100644
--- a/libavcodec/vvc/vvcdec.c
+++ b/libavcodec/vvc/vvcdec.c
@@ -29,6 +29,7 @@
#include "libavutil/cpu.h"
#include "libavutil/thread.h"
+#include "config_components.h"
#include "vvcdec.h"
#include "vvc_ctu.h"
#include "vvc_data.h"
@@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc,
static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps)
{
-#define HWACCEL_MAX 0
+#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
switch (sps->pix_fmt) {
case AV_PIX_FMT_YUV420P:
+#if CONFIG_VVC_VAAPI_HWACCEL
+ *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
break;
case AV_PIX_FMT_YUV420P10:
+#if CONFIG_VVC_VAAPI_HWACCEL
+ *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
break;
}
@@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = {
.caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING | FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_AUTO_THREADS,
.p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles),
+ .hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_VVC_VAAPI_HWACCEL
+ HWACCEL_VAAPI(vvc),
+#endif
+ NULL
+ },
};
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com
@ 2024-03-28 2:04 ` Andreas Rheinhardt
2024-04-02 6:24 ` Wang, Fei W
0 siblings, 1 reply; 14+ messages in thread
From: Andreas Rheinhardt @ 2024-03-28 2:04 UTC (permalink / raw)
To: ffmpeg-devel
fei.w.wang-at-intel.com@ffmpeg.org:
> From: Fei Wang <fei.w.wang@intel.com>
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
> libavcodec/vvc/vvc_refs.c | 6 ++++
> libavcodec/vvc/vvcdec.c | 67 +++++++++++++++++++++++++++++++++++----
> libavcodec/vvc/vvcdec.h | 5 +++
> 3 files changed, 72 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c
> index bf70777550..c9f89a5a0a 100644
> --- a/libavcodec/vvc/vvc_refs.c
> +++ b/libavcodec/vvc/vvc_refs.c
> @@ -25,6 +25,7 @@
> #include "libavutil/thread.h"
> #include "libavcodec/refstruct.h"
> #include "libavcodec/thread.h"
> +#include "libavcodec/decode.h"
>
> #include "vvc_refs.h"
>
> @@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags)
> ff_refstruct_unref(&frame->rpl_tab);
>
> frame->collocated_ref = NULL;
> + ff_refstruct_unref(&frame->hwaccel_picture_private);
> }
> }
>
> @@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc)
> if (!frame->progress)
> goto fail;
>
> + ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame->hwaccel_picture_private);
> + if (ret < 0)
> + goto fail;
> +
> return frame;
> fail:
> ff_vvc_unref_frame(fc, frame, ~0);
> diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
> index d5704aca25..f2e269ce76 100644
> --- a/libavcodec/vvc/vvcdec.c
> +++ b/libavcodec/vvc/vvcdec.c
> @@ -24,6 +24,8 @@
> #include "libavcodec/decode.h"
> #include "libavcodec/profiles.h"
> #include "libavcodec/refstruct.h"
> +#include "libavcodec/hwconfig.h"
> +#include "libavcodec/hwaccel_internal.h"
> #include "libavutil/cpu.h"
> #include "libavutil/thread.h"
>
> @@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src)
>
> ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab);
> ff_refstruct_replace(&dst->rpl, src->rpl);
> + ff_refstruct_replace(&dst->hwaccel_picture_private,
> + src->hwaccel_picture_private);
> dst->nb_rpl_elems = src->nb_rpl_elems;
>
> dst->poc = src->poc;
> @@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc,
> return 0;
> }
>
> +static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps)
> +{
> +#define HWACCEL_MAX 0
> +
> + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
> +
> + switch (sps->pix_fmt) {
> + case AV_PIX_FMT_YUV420P:
> + break;
> + case AV_PIX_FMT_YUV420P10:
> + break;
> + }
> +
> + *fmt++ = sps->pix_fmt;
> + *fmt = AV_PIX_FMT_NONE;
> +
> + return ff_get_format(avctx, pix_fmts);
> +}
> +
> static void export_frame_params(VVCContext *s, const VVCFrameContext *fc)
> {
> AVCodecContext *c = s->avctx;
> const VVCSPS *sps = fc->ps.sps;
> const VVCPPS *pps = fc->ps.pps;
>
> - c->pix_fmt = sps->pix_fmt;
> - c->coded_width = pps->width;
> - c->coded_height = pps->height;
> - c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]);
> - c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]);
> + // Reset HW config if pix_fmt/w/h change.
> + if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width || c->coded_height != pps->height) {
> + c->coded_width = pps->width;
> + c->coded_height = pps->height;
> + c->pix_fmt = get_format(c, sps);
> + s->pix_fmt = sps->pix_fmt;
> + }
> +
> + c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]);
> + c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]);
> }
>
> static int frame_setup(VVCFrameContext *fc, VVCContext *s)
> @@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s, VVCFrameContext *fc, const H2645NAL *nal,
> ret = slice_init_entry_points(sc, fc, nal, unit);
> if (ret < 0)
> return ret;
> +
> + if (s->avctx->hwaccel) {
> + if (is_first_slice) {
> + ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0);
> + if (ret < 0)
> + return ret;
> + }
> +
> + ret = FF_HW_CALL(s->avctx, decode_slice,
> + nal->raw_data, nal->raw_size);
> + if (ret < 0)
> + return ret;
> + }
> +
> fc->nb_slices++;
>
> return 0;
> @@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output)
> static int submit_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *output, int *got_output)
> {
> int ret;
> +
> + if (s->avctx->hwaccel) {
> + if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) {
> + av_log(s->avctx, AV_LOG_ERROR,
> + "Hardware accelerator failed to decode picture\n");
> + ff_vvc_unref_frame(fc, fc->ref, ~0);
> + return ret;
> + }
> + } else
> + ff_vvc_frame_submit(s, fc);
> +
> s->nb_frames++;
> s->nb_delayed++;
> - ff_vvc_frame_submit(s, fc);
> +
> if (s->nb_delayed >= s->nb_fcs) {
> if ((ret = wait_delayed_frame(s, output, got_output)) < 0)
> return ret;
> @@ -1027,6 +1080,8 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx)
> GDR_SET_RECOVERED(s);
> ff_thread_once(&init_static_once, init_default_scale_m);
>
> + s->pix_fmt = AV_PIX_FMT_NONE;
> +
> return 0;
> }
>
> diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h
> index aa3d715524..009d57424e 100644
> --- a/libavcodec/vvc/vvcdec.h
> +++ b/libavcodec/vvc/vvcdec.h
> @@ -78,6 +78,9 @@ typedef struct VVCFrame {
> * A combination of VVC_FRAME_FLAG_*
> */
> uint8_t flags;
> +
> + AVBufferRef *hwaccel_priv_buf;
Seems unused (hwaccel_picture_private uses RefStruct nowadays; no other
decoder uses the AVBuffer API for this any more).
> + void *hwaccel_picture_private; ///< hardware accelerator private data
> } VVCFrame;
>
> typedef struct SliceContext {
> @@ -224,6 +227,8 @@ typedef struct VVCContext {
>
> uint64_t nb_frames; ///< processed frames
> int nb_delayed; ///< delayed frames
> +
> + enum AVPixelFormat pix_fmt; ///< pix format of current frame
> } VVCContext ;
>
> #endif /* AVCODEC_VVC_VVCDEC_H */
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
` (5 preceding siblings ...)
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com
@ 2024-04-01 19:52 ` Mark Thompson
2024-04-02 6:16 ` Wang, Fei W
6 siblings, 1 reply; 14+ messages in thread
From: Mark Thompson @ 2024-04-01 19:52 UTC (permalink / raw)
To: ffmpeg-devel
On 28/03/2024 01:26, fei.w.wang-at-intel.com@ffmpeg.org wrote:
> From: Fei Wang <fei.w.wang@intel.com>
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
> libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++-------
> libavcodec/vaapi_decode.h | 7 ++-----
> 2 files changed, 24 insertions(+), 12 deletions(-)
This is because the VVC code is going to want to make a lot more of these param buffers - can we just set a slightly larger fixed limit?
If you always need 20 buffers (say), then this has turned 1 allocation per picture into 3 and used more memory in the non-VVC case as well because of the overhead of that (but if you might variably need up to 200 then this is completely fair).
> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> index cca94b5336..1b1972a2a9 100644
> --- a/libavcodec/vaapi_decode.c
> +++ b/libavcodec/vaapi_decode.c
> @@ -38,12 +38,23 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
> {
> VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
> VAStatus vas;
> - VABufferID buffer;
>
> - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
> + av_assert0(pic->nb_param_buffers <= pic->param_allocated);
> + if (pic->nb_param_buffers == pic->param_allocated) {
> + pic->param_buffers =
> + av_realloc_array(pic->param_buffers,
> + pic->param_allocated + 16,
> + sizeof(*pic->param_buffers));
Use av_reallocp_array() to avoid leaking the pointer on failure.
> + if (!pic->param_buffers)
> + return AVERROR(ENOMEM);
> +
> + pic->param_allocated += 16;
> + }
> + av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated);
>
> vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
> - type, size, 1, (void*)data, &buffer);
> + type, size, 1, (void*)data,
> + &pic->param_buffers[pic->nb_param_buffers]);
> if (vas != VA_STATUS_SUCCESS) {
> av_log(avctx, AV_LOG_ERROR, "Failed to create parameter "
> "buffer (type %d): %d (%s).\n",
> @@ -51,14 +62,14 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
> return AVERROR(EIO);
> }
>
> - pic->param_buffers[pic->nb_param_buffers++] = buffer;
> -
> av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes) "
> - "is %#x.\n", type, size, buffer);
> + "is %#x.\n", type, size, pic->param_buffers[pic->nb_param_buffers]);
> +
> + ++pic->nb_param_buffers;
> +
> return 0;
> }
>
> -
> int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx,
> VAAPIDecodePicture *pic,
> const void *params_data,
> @@ -215,6 +226,8 @@ fail:
> fail_at_end:
> exit:
> pic->nb_param_buffers = 0;
> + pic->param_allocated = 0;
> + av_freep(&pic->param_buffers);
> pic->nb_slices = 0;
> pic->slices_allocated = 0;
> av_freep(&pic->slice_buffers);
> @@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext *avctx,
> ff_vaapi_decode_destroy_buffers(avctx, pic);
>
> pic->nb_param_buffers = 0;
> + pic->param_allocated = 0;
> + av_freep(&pic->param_buffers);
> pic->nb_slices = 0;
> pic->slices_allocated = 0;
> av_freep(&pic->slice_buffers);
> diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h
> index 6beda14e52..a41d7ff2ff 100644
> --- a/libavcodec/vaapi_decode.h
> +++ b/libavcodec/vaapi_decode.h
> @@ -32,15 +32,12 @@ static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic)
> return (uintptr_t)pic->data[3];
> }
>
> -enum {
> - MAX_PARAM_BUFFERS = 16,
> -};
> -
> typedef struct VAAPIDecodePicture {
> VASurfaceID output_surface;
>
> int nb_param_buffers;
> - VABufferID param_buffers[MAX_PARAM_BUFFERS];
> + VABufferID *param_buffers;
Previously the array was zeroed at allocation but now it isn't. Can you confirm that that isn't a problem?
> + int param_allocated;
Maybe "nb_param_buffers_allocated" would be clearer.
>
> int nb_slices;
> VABufferID *slice_buffers;
Thanks,
- Mark
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically
2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson
@ 2024-04-02 6:16 ` Wang, Fei W
0 siblings, 0 replies; 14+ messages in thread
From: Wang, Fei W @ 2024-04-02 6:16 UTC (permalink / raw)
To: ffmpeg-devel
On Mon, 2024-04-01 at 20:52 +0100, Mark Thompson wrote:
> On 28/03/2024 01:26, fei.w.wang-at-intel.com@ffmpeg.org wrote:
> > From: Fei Wang <fei.w.wang@intel.com>
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> > libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++-------
> > libavcodec/vaapi_decode.h | 7 ++-----
> > 2 files changed, 24 insertions(+), 12 deletions(-)
>
> This is because the VVC code is going to want to make a lot more of
> these param buffers - can we just set a slightly larger fixed limit?
>
> If you always need 20 buffers (say), then this has turned 1
> allocation per picture into 3 and used more memory in the non-VVC
> case as well because of the overhead of that (but if you might
> variably need up to 200 then this is completely fair).
VVC support 1000 slices and 990 tile columns, and other buffer like
APS/tile_rows... So there will be 1990+ buffers needed at most in
theory.
For other non-VVC case, 16 buffers will be created which is same with
before.
>
> > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> > index cca94b5336..1b1972a2a9 100644
> > --- a/libavcodec/vaapi_decode.c
> > +++ b/libavcodec/vaapi_decode.c
> > @@ -38,12 +38,23 @@ int
> > ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
> > {
> > VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
> > VAStatus vas;
> > - VABufferID buffer;
> >
> > - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
> > + av_assert0(pic->nb_param_buffers <= pic->param_allocated);
> > + if (pic->nb_param_buffers == pic->param_allocated) {
> > + pic->param_buffers =
> > + av_realloc_array(pic->param_buffers,
> > + pic->param_allocated + 16,
> > + sizeof(*pic->param_buffers));
>
> Use av_reallocp_array() to avoid leaking the pointer on failure.
>
> > + if (!pic->param_buffers)
> > + return AVERROR(ENOMEM);
> > +
> > + pic->param_allocated += 16;
> > + }
> > + av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated);
> >
> > vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
> > - type, size, 1, (void*)data, &buffer);
> > + type, size, 1, (void*)data,
> > + &pic->param_buffers[pic-
> > >nb_param_buffers]);
> > if (vas != VA_STATUS_SUCCESS) {
> > av_log(avctx, AV_LOG_ERROR, "Failed to create parameter "
> > "buffer (type %d): %d (%s).\n",
> > @@ -51,14 +62,14 @@ int
> > ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx,
> > return AVERROR(EIO);
> > }
> >
> > - pic->param_buffers[pic->nb_param_buffers++] = buffer;
> > -
> > av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu
> > bytes) "
> > - "is %#x.\n", type, size, buffer);
> > + "is %#x.\n", type, size, pic->param_buffers[pic-
> > >nb_param_buffers]);
> > +
> > + ++pic->nb_param_buffers;
> > +
> > return 0;
> > }
> >
> > -
> > int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx,
> > VAAPIDecodePicture *pic,
> > const void *params_data,
> > @@ -215,6 +226,8 @@ fail:
> > fail_at_end:
> > exit:
> > pic->nb_param_buffers = 0;
> > + pic->param_allocated = 0;
> > + av_freep(&pic->param_buffers);
> > pic->nb_slices = 0;
> > pic->slices_allocated = 0;
> > av_freep(&pic->slice_buffers);
> > @@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext
> > *avctx,
> > ff_vaapi_decode_destroy_buffers(avctx, pic);
> >
> > pic->nb_param_buffers = 0;
> > + pic->param_allocated = 0;
> > + av_freep(&pic->param_buffers);
> > pic->nb_slices = 0;
> > pic->slices_allocated = 0;
> > av_freep(&pic->slice_buffers);
> > diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h
> > index 6beda14e52..a41d7ff2ff 100644
> > --- a/libavcodec/vaapi_decode.h
> > +++ b/libavcodec/vaapi_decode.h
> > @@ -32,15 +32,12 @@ static inline VASurfaceID
> > ff_vaapi_get_surface_id(AVFrame *pic)
> > return (uintptr_t)pic->data[3];
> > }
> >
> > -enum {
> > - MAX_PARAM_BUFFERS = 16,
> > -};
> > -
> > typedef struct VAAPIDecodePicture {
> > VASurfaceID output_surface;
> >
> > int nb_param_buffers;
> > - VABufferID param_buffers[MAX_PARAM_BUFFERS];
> > + VABufferID *param_buffers;
>
> Previously the array was zeroed at allocation but now it isn't. Can
> you confirm that that isn't a problem?
It is should not be a problem, all buffers visited and destroyed
through nb_param_buffers which record if the buffer is valid.
Thanks
Fei
>
> > + int param_allocated;
>
> Maybe "nb_param_buffers_allocated" would be clearer.
>
> >
> > int nb_slices;
> > VABufferID *slice_buffers;
>
> Thanks,
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API
2024-03-28 2:04 ` Andreas Rheinhardt
@ 2024-04-02 6:24 ` Wang, Fei W
0 siblings, 0 replies; 14+ messages in thread
From: Wang, Fei W @ 2024-04-02 6:24 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Wang, Fei W
On Thu, 2024-03-28 at 03:04 +0100, Andreas Rheinhardt wrote:
> fei.w.wang-at-intel.com@ffmpeg.org:
> > From: Fei Wang <fei.w.wang@intel.com>
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> > libavcodec/vvc/vvc_refs.c | 6 ++++
> > libavcodec/vvc/vvcdec.c | 67
> > +++++++++++++++++++++++++++++++++++----
> > libavcodec/vvc/vvcdec.h | 5 +++
> > 3 files changed, 72 insertions(+), 6 deletions(-)
> >
> > diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c
> > index bf70777550..c9f89a5a0a 100644
> > --- a/libavcodec/vvc/vvc_refs.c
> > +++ b/libavcodec/vvc/vvc_refs.c
> > @@ -25,6 +25,7 @@
> > #include "libavutil/thread.h"
> > #include "libavcodec/refstruct.h"
> > #include "libavcodec/thread.h"
> > +#include "libavcodec/decode.h"
> >
> > #include "vvc_refs.h"
> >
> > @@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc,
> > VVCFrame *frame, int flags)
> > ff_refstruct_unref(&frame->rpl_tab);
> >
> > frame->collocated_ref = NULL;
> > + ff_refstruct_unref(&frame->hwaccel_picture_private);
> > }
> > }
> >
> > @@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s,
> > VVCFrameContext *fc)
> > if (!frame->progress)
> > goto fail;
> >
> > + ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame-
> > >hwaccel_picture_private);
> > + if (ret < 0)
> > + goto fail;
> > +
> > return frame;
> > fail:
> > ff_vvc_unref_frame(fc, frame, ~0);
> > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
> > index d5704aca25..f2e269ce76 100644
> > --- a/libavcodec/vvc/vvcdec.c
> > +++ b/libavcodec/vvc/vvcdec.c
> > @@ -24,6 +24,8 @@
> > #include "libavcodec/decode.h"
> > #include "libavcodec/profiles.h"
> > #include "libavcodec/refstruct.h"
> > +#include "libavcodec/hwconfig.h"
> > +#include "libavcodec/hwaccel_internal.h"
> > #include "libavutil/cpu.h"
> > #include "libavutil/thread.h"
> >
> > @@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const
> > VVCFrame *src)
> >
> > ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab);
> > ff_refstruct_replace(&dst->rpl, src->rpl);
> > + ff_refstruct_replace(&dst->hwaccel_picture_private,
> > + src->hwaccel_picture_private);
> > dst->nb_rpl_elems = src->nb_rpl_elems;
> >
> > dst->poc = src->poc;
> > @@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc,
> > VVCContext *s, VVCFrameContext *fc,
> > return 0;
> > }
> >
> > +static enum AVPixelFormat get_format(AVCodecContext *avctx, const
> > VVCSPS *sps)
> > +{
> > +#define HWACCEL_MAX 0
> > +
> > + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
> > +
> > + switch (sps->pix_fmt) {
> > + case AV_PIX_FMT_YUV420P:
> > + break;
> > + case AV_PIX_FMT_YUV420P10:
> > + break;
> > + }
> > +
> > + *fmt++ = sps->pix_fmt;
> > + *fmt = AV_PIX_FMT_NONE;
> > +
> > + return ff_get_format(avctx, pix_fmts);
> > +}
> > +
> > static void export_frame_params(VVCContext *s, const
> > VVCFrameContext *fc)
> > {
> > AVCodecContext *c = s->avctx;
> > const VVCSPS *sps = fc->ps.sps;
> > const VVCPPS *pps = fc->ps.pps;
> >
> > - c->pix_fmt = sps->pix_fmt;
> > - c->coded_width = pps->width;
> > - c->coded_height = pps->height;
> > - c->width = pps->width - ((pps->r-
> > >pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) <<
> > sps->hshift[CHROMA]);
> > - c->height = pps->height - ((pps->r-
> > >pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) <<
> > sps->vshift[CHROMA]);
> > + // Reset HW config if pix_fmt/w/h change.
> > + if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width
> > || c->coded_height != pps->height) {
> > + c->coded_width = pps->width;
> > + c->coded_height = pps->height;
> > + c->pix_fmt = get_format(c, sps);
> > + s->pix_fmt = sps->pix_fmt;
> > + }
> > +
> > + c->width = pps->width - ((pps->r->pps_conf_win_left_offset +
> > pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]);
> > + c->height = pps->height - ((pps->r->pps_conf_win_top_offset +
> > pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]);
> > }
> >
> > static int frame_setup(VVCFrameContext *fc, VVCContext *s)
> > @@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s,
> > VVCFrameContext *fc, const H2645NAL *nal,
> > ret = slice_init_entry_points(sc, fc, nal, unit);
> > if (ret < 0)
> > return ret;
> > +
> > + if (s->avctx->hwaccel) {
> > + if (is_first_slice) {
> > + ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0);
> > + if (ret < 0)
> > + return ret;
> > + }
> > +
> > + ret = FF_HW_CALL(s->avctx, decode_slice,
> > + nal->raw_data, nal->raw_size);
> > + if (ret < 0)
> > + return ret;
> > + }
> > +
> > fc->nb_slices++;
> >
> > return 0;
> > @@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s,
> > AVFrame *output, int *got_output)
> > static int submit_frame(VVCContext *s, VVCFrameContext *fc,
> > AVFrame *output, int *got_output)
> > {
> > int ret;
> > +
> > + if (s->avctx->hwaccel) {
> > + if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) {
> > + av_log(s->avctx, AV_LOG_ERROR,
> > + "Hardware accelerator failed to decode
> > picture\n");
> > + ff_vvc_unref_frame(fc, fc->ref, ~0);
> > + return ret;
> > + }
> > + } else
> > + ff_vvc_frame_submit(s, fc);
> > +
> > s->nb_frames++;
> > s->nb_delayed++;
> > - ff_vvc_frame_submit(s, fc);
> > +
> > if (s->nb_delayed >= s->nb_fcs) {
> > if ((ret = wait_delayed_frame(s, output, got_output)) < 0)
> > return ret;
> > @@ -1027,6 +1080,8 @@ static av_cold int
> > vvc_decode_init(AVCodecContext *avctx)
> > GDR_SET_RECOVERED(s);
> > ff_thread_once(&init_static_once, init_default_scale_m);
> >
> > + s->pix_fmt = AV_PIX_FMT_NONE;
> > +
> > return 0;
> > }
> >
> > diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h
> > index aa3d715524..009d57424e 100644
> > --- a/libavcodec/vvc/vvcdec.h
> > +++ b/libavcodec/vvc/vvcdec.h
> > @@ -78,6 +78,9 @@ typedef struct VVCFrame {
> > * A combination of VVC_FRAME_FLAG_*
> > */
> > uint8_t flags;
> > +
> > + AVBufferRef *hwaccel_priv_buf;
>
> Seems unused (hwaccel_picture_private uses RefStruct nowadays; no
> other
> decoder uses the AVBuffer API for this any more).
Thanks, will fix in next version.
Fei
>
> > + void *hwaccel_picture_private; ///< hardware accelerator
> > private data
> > } VVCFrame;
> >
> > typedef struct SliceContext {
> > @@ -224,6 +227,8 @@ typedef struct VVCContext {
> >
> > uint64_t nb_frames; ///< processed frames
> > int nb_delayed; ///< delayed frames
> > +
> > + enum AVPixelFormat pix_fmt; ///< pix format of current frame
> > } VVCContext ;
> >
> > #endif /* AVCODEC_VVC_VVCDEC_H */
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com
@ 2024-04-02 12:48 ` Nuo Mi
2024-04-03 3:31 ` Wang, Fei W
0 siblings, 1 reply; 14+ messages in thread
From: Nuo Mi @ 2024-04-02 12:48 UTC (permalink / raw)
To: FFmpeg development discussions and patches; +Cc: fei.w.wang
On Thu, Mar 28, 2024 at 9:27 AM <fei.w.wang-at-intel.com@ffmpeg.org> wrote:
> From: Fei Wang <fei.w.wang@intel.com>
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
> Changelog | 4 +
> configure | 3 +
> libavcodec/Makefile | 1 +
> libavcodec/hwaccels.h | 1 +
> libavcodec/vaapi_decode.c | 7 +
> libavcodec/vaapi_vvc.c | 657 ++++++++++++++++++++++++++++++++++++++
> libavcodec/version.h | 2 +-
> libavcodec/vvc/vvcdec.c | 15 +-
> 8 files changed, 688 insertions(+), 2 deletions(-)
> create mode 100644 libavcodec/vaapi_vvc.c
>
> diff --git a/Changelog b/Changelog
> index e83a00e35c..3108e65558 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -1,6 +1,10 @@
> Entries are sorted chronologically from oldest to youngest within each
> release,
> releases are sorted from youngest to oldest.
>
> +version <next>:
> +- VVC VAAPI decoder
> +
> +
> version 7.0:
> - DXV DXT1 encoder
> - LEAD MCMP decoder
> diff --git a/configure b/configure
> index 2a1d22310b..d902c9adc8 100755
> --- a/configure
> +++ b/configure
> @@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9"
> vp9_vdpau_hwaccel_select="vp9_decoder"
> vp9_videotoolbox_hwaccel_deps="videotoolbox"
> vp9_videotoolbox_hwaccel_select="vp9_decoder"
> +vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC"
> +vvc_vaapi_hwaccel_select="vvc_decoder"
> wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
> wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel"
> wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel"
> @@ -7250,6 +7252,7 @@ if enabled vaapi; then
> check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, 0)"
>
> check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC"
> + check_type "va/va.h va/va_dec_vvc.h" "VAPictureParameterBufferVVC"
> check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth
> check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx
> check_type "va/va.h va/va_vpp.h"
> "VAProcFilterParameterBufferHDRToneMapping"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 9ce6d445c1..343b037636 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) +=
> vaapi_vp9.o
> OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
> OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
> OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
> +OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
>
> # Objects duplicated from other libraries for shared builds
> SHLIBOBJS += log2_tab.o reverse.o
> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> index 5171e4c7d7..88d6b9a9b5 100644
> --- a/libavcodec/hwaccels.h
> +++ b/libavcodec/hwaccels.h
> @@ -82,6 +82,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel;
> extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
> extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
> extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
> +extern const struct FFHWAccel ff_vvc_vaapi_hwaccel;
> extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel;
> extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel;
> extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel;
> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> index 1b1972a2a9..ceeb1f1a12 100644
> --- a/libavcodec/vaapi_decode.c
> +++ b/libavcodec/vaapi_decode.c
> @@ -455,6 +455,9 @@ static const struct {
> MAP(AV1, AV1_MAIN, AV1Profile0),
> MAP(AV1, AV1_HIGH, AV1Profile1),
> #endif
> +#if VA_CHECK_VERSION(1, 22, 0)
> + MAP(H266, VVC_MAIN_10, VVCMain10),
> +#endif
>
> #undef MAP
> };
> @@ -627,6 +630,10 @@ static int vaapi_decode_make_config(AVCodecContext
> *avctx,
> case AV_CODEC_ID_VP8:
> frames->initial_pool_size += 3;
> break;
> + case AV_CODEC_ID_H266:
> + // Add additional 16 for maximum 16 frames delay in vvc
> native decode.
> + frames->initial_pool_size += 32;
>
One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames amount
to approximately 6GB.Can we dynamically allocate the buffer pool?
The software decoder requires a delay of 16 frames to ensure full
utilization of CPUs. In the future, we may consider increasing this to 32
or even 64 frames.
However, for hardware decoding, given that all processing occurs on the
GPU, we do not require any delay.
+ break;
> default:
> frames->initial_pool_size += 2;
> }
> diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c
> new file mode 100644
> index 0000000000..6141005688
> --- /dev/null
> +++ b/libavcodec/vaapi_vvc.c
> @@ -0,0 +1,657 @@
> +/*
> + * VVC HW decode acceleration through VA API
> + *
> + * Copyright (c) 2024 Intel Corporation
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include <va/va.h>
> +#include <va/va_dec_vvc.h>
> +
> +#include "vvc/vvcdec.h"
> +#include "vvc/vvc_refs.h"
> +#include "hwaccel_internal.h"
> +#include "vaapi_decode.h"
> +
> +typedef struct VAAPIDecodePictureVVC {
> + VAAPIDecodePicture pic;
> + VAPictureParameterBufferVVC pic_param;
> + VASliceParameterBufferVVC slice_param;
> + int decode_issued;
> +} VAAPIDecodePictureVVC;
> +
> +static void init_vaapi_pic(VAPictureVVC *va_pic)
> +{
> + va_pic->picture_id = VA_INVALID_ID;
> + va_pic->flags = VA_PICTURE_VVC_INVALID;
> + va_pic->pic_order_cnt = 0;
> +}
> +
> +static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame *pic)
> +{
> + va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame);
> + va_pic->pic_order_cnt = pic->poc;
> + va_pic->flags = 0;
> +
> + if (pic->flags & VVC_FRAME_FLAG_LONG_REF)
> + va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE;
> +}
> +
> +static void fill_vaapi_reference_frames(const VVCFrameContext *h,
> VAPictureParameterBufferVVC *pp)
> +{
> + const VVCFrame *current_picture = h->ref;
> + int i, j;
> +
> + for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
> + const VVCFrame *frame = NULL;
> +
> + while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) {
> + if ((&h->DPB[j] != current_picture ) &&
> + (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF |
> VVC_FRAME_FLAG_SHORT_REF)))
> + frame = &h->DPB[j];
> + j++;
> + }
> +
> + init_vaapi_pic(&pp->ReferenceFrames[i]);
> +
> + if (frame) {
> + VAAPIDecodePictureVVC *pic;
> + fill_vaapi_pic(&pp->ReferenceFrames[i], frame);
> + pic = frame->hwaccel_picture_private;
> + if (!pic->decode_issued)
> + pp->ReferenceFrames[i].flags |=
> VA_PICTURE_VVC_UNAVAILABLE_REFERENCE;
> + }
> + }
> +}
> +
> +static int vaapi_vvc_start_frame(AVCodecContext *avctx,
> + av_unused const uint8_t *buffer,
> + av_unused uint32_t size)
> +{
> + const VVCContext *h = avctx->priv_data;
> + VVCFrameContext *fc = &h->fcs[(h->nb_frames +
> h->nb_fcs) % h->nb_fcs];
> + const H266RawSPS *sps = fc->ps.sps->r;
> + const H266RawPPS *pps = fc->ps.pps->r;
> + const H266RawPictureHeader *ph = fc->ps.ph.r;
> + VAAPIDecodePictureVVC *pic =
> fc->ref->hwaccel_picture_private;
> + VAPictureParameterBufferVVC *pic_param = &pic->pic_param;
> + uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] = {0};
> + int i, j, k, err;
> +
> + pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref->frame);
> +
> + *pic_param = (VAPictureParameterBufferVVC) {
> + .pps_pic_width_in_luma_samples =
> pps->pps_pic_width_in_luma_samples,
> + .pps_pic_height_in_luma_samples =
> pps->pps_pic_height_in_luma_samples,
> + .sps_num_subpics_minus1 =
> sps->sps_num_subpics_minus1,
> + .sps_chroma_format_idc =
> sps->sps_chroma_format_idc,
> + .sps_bitdepth_minus8 =
> sps->sps_bitdepth_minus8,
> + .sps_log2_ctu_size_minus5 =
> sps->sps_log2_ctu_size_minus5,
> + .sps_log2_min_luma_coding_block_size_minus2 =
> sps->sps_log2_min_luma_coding_block_size_minus2,
> + .sps_log2_transform_skip_max_size_minus2 =
> sps->sps_log2_transform_skip_max_size_minus2,
> + .sps_six_minus_max_num_merge_cand =
> sps->sps_six_minus_max_num_merge_cand,
> + .sps_five_minus_max_num_subblock_merge_cand =
> sps->sps_five_minus_max_num_subblock_merge_cand,
> + .sps_max_num_merge_cand_minus_max_num_gpm_cand =
> sps->sps_max_num_merge_cand_minus_max_num_gpm_cand,
> + .sps_log2_parallel_merge_level_minus2 =
> sps->sps_log2_parallel_merge_level_minus2,
> + .sps_min_qp_prime_ts =
> sps->sps_min_qp_prime_ts,
> + .sps_six_minus_max_num_ibc_merge_cand =
> sps->sps_six_minus_max_num_ibc_merge_cand,
> + .sps_num_ladf_intervals_minus2 =
> sps->sps_num_ladf_intervals_minus2,
> + .sps_ladf_lowest_interval_qp_offset =
> sps->sps_ladf_lowest_interval_qp_offset,
> + .sps_flags.bits = {
> + .sps_subpic_info_present_flag
> = sps->sps_subpic_info_present_flag,
> + .sps_independent_subpics_flag
> = sps->sps_independent_subpics_flag,
> + .sps_subpic_same_size_flag
> = sps->sps_subpic_same_size_flag,
> + .sps_entropy_coding_sync_enabled_flag
> = sps->sps_entropy_coding_sync_enabled_flag,
> + .sps_qtbtt_dual_tree_intra_flag
> = sps->sps_qtbtt_dual_tree_intra_flag,
> + .sps_max_luma_transform_size_64_flag
> = sps->sps_max_luma_transform_size_64_flag,
> + .sps_transform_skip_enabled_flag
> = sps->sps_transform_skip_enabled_flag,
> + .sps_bdpcm_enabled_flag
> = sps->sps_bdpcm_enabled_flag,
> + .sps_mts_enabled_flag
> = sps->sps_mts_enabled_flag,
> + .sps_explicit_mts_intra_enabled_flag
> = sps->sps_explicit_mts_intra_enabled_flag,
> + .sps_explicit_mts_inter_enabled_flag
> = sps->sps_explicit_mts_inter_enabled_flag,
> + .sps_lfnst_enabled_flag
> = sps->sps_lfnst_enabled_flag,
> + .sps_joint_cbcr_enabled_flag
> = sps->sps_joint_cbcr_enabled_flag,
> + .sps_same_qp_table_for_chroma_flag
> = sps->sps_same_qp_table_for_chroma_flag,
> + .sps_sao_enabled_flag
> = sps->sps_sao_enabled_flag,
> + .sps_alf_enabled_flag
> = sps->sps_alf_enabled_flag,
> + .sps_ccalf_enabled_flag
> = sps->sps_alf_enabled_flag,
> + .sps_lmcs_enabled_flag
> = sps->sps_lmcs_enabled_flag,
> + .sps_sbtmvp_enabled_flag
> = sps->sps_sbtmvp_enabled_flag,
> + .sps_amvr_enabled_flag
> = sps->sps_amvr_enabled_flag,
> + .sps_smvd_enabled_flag
> = sps->sps_smvd_enabled_flag,
> + .sps_mmvd_enabled_flag
> = sps->sps_mmvd_enabled_flag,
> + .sps_sbt_enabled_flag
> = sps->sps_sbt_enabled_flag,
> + .sps_affine_enabled_flag
> = sps->sps_affine_enabled_flag,
> + .sps_6param_affine_enabled_flag
> = sps->sps_6param_affine_enabled_flag,
> + .sps_affine_amvr_enabled_flag
> = sps->sps_affine_amvr_enabled_flag,
> + .sps_affine_prof_enabled_flag
> = sps->sps_affine_prof_enabled_flag,
> + .sps_bcw_enabled_flag
> = sps->sps_bcw_enabled_flag,
> + .sps_ciip_enabled_flag
> = sps->sps_ciip_enabled_flag,
> + .sps_gpm_enabled_flag
> = sps->sps_gpm_enabled_flag,
> + .sps_isp_enabled_flag
> = sps->sps_isp_enabled_flag,
> + .sps_mrl_enabled_flag
> = sps->sps_mrl_enabled_flag,
> + .sps_mip_enabled_flag
> = sps->sps_mip_enabled_flag,
> + .sps_cclm_enabled_flag
> = sps->sps_cclm_enabled_flag,
> + .sps_chroma_horizontal_collocated_flag
> = sps->sps_chroma_horizontal_collocated_flag,
> + .sps_chroma_vertical_collocated_flag
> = sps->sps_chroma_vertical_collocated_flag,
> + .sps_palette_enabled_flag
> = sps->sps_palette_enabled_flag,
> + .sps_act_enabled_flag
> = sps->sps_act_enabled_flag,
> + .sps_ibc_enabled_flag
> = sps->sps_ibc_enabled_flag,
> + .sps_ladf_enabled_flag
> = sps->sps_ladf_enabled_flag,
> + .sps_explicit_scaling_list_enabled_flag
> = sps->sps_explicit_scaling_list_enabled_flag,
> + .sps_scaling_matrix_for_lfnst_disabled_flag
> = sps->sps_scaling_matrix_for_lfnst_disabled_flag,
> +
> .sps_scaling_matrix_for_alternative_colour_space_disabled_flag =
> sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag,
> + .sps_scaling_matrix_designated_colour_space_flag
> = sps->sps_scaling_matrix_designated_colour_space_flag,
> + .sps_virtual_boundaries_enabled_flag
> = sps->sps_virtual_boundaries_enabled_flag,
> + .sps_virtual_boundaries_present_flag
> = sps->sps_virtual_boundaries_present_flag,
> + },
> + .NumVerVirtualBoundaries =
> sps->sps_virtual_boundaries_present_flag ?
> +
> sps->sps_num_ver_virtual_boundaries :
> +
> ph->ph_num_ver_virtual_boundaries,
> + .NumHorVirtualBoundaries =
> sps->sps_virtual_boundaries_present_flag ?
> +
> sps->sps_num_hor_virtual_boundaries :
> +
> ph->ph_num_hor_virtual_boundaries,
> + .pps_scaling_win_left_offset =
> pps->pps_scaling_win_left_offset,
> + .pps_scaling_win_right_offset =
> pps->pps_scaling_win_right_offset,
> + .pps_scaling_win_top_offset =
> pps->pps_scaling_win_top_offset,
> + .pps_scaling_win_bottom_offset =
> pps->pps_scaling_win_bottom_offset,
> + .pps_num_exp_tile_columns_minus1 =
> pps->pps_num_exp_tile_columns_minus1,
> + .pps_num_exp_tile_rows_minus1 =
> pps->pps_num_exp_tile_rows_minus1,
> + .pps_num_slices_in_pic_minus1 =
> pps->pps_num_slices_in_pic_minus1,
> + .pps_pic_width_minus_wraparound_offset =
> pps->pps_pic_width_minus_wraparound_offset,
> + .pps_cb_qp_offset = pps->pps_cb_qp_offset,
> + .pps_cr_qp_offset = pps->pps_cr_qp_offset,
> + .pps_joint_cbcr_qp_offset_value =
> pps->pps_joint_cbcr_qp_offset_value,
> + .pps_chroma_qp_offset_list_len_minus1 =
> pps->pps_chroma_qp_offset_list_len_minus1,
> + .pps_flags.bits = {
> + .pps_loop_filter_across_tiles_enabled_flag =
> pps->pps_loop_filter_across_tiles_enabled_flag,
> + .pps_rect_slice_flag =
> pps->pps_rect_slice_flag,
> + .pps_single_slice_per_subpic_flag =
> pps->pps_single_slice_per_subpic_flag,
> + .pps_loop_filter_across_slices_enabled_flag =
> pps->pps_loop_filter_across_slices_enabled_flag,
> + .pps_weighted_pred_flag =
> pps->pps_weighted_pred_flag,
> + .pps_weighted_bipred_flag =
> pps->pps_weighted_bipred_flag,
> + .pps_ref_wraparound_enabled_flag =
> pps->pps_ref_wraparound_enabled_flag,
> + .pps_cu_qp_delta_enabled_flag =
> pps->pps_cu_qp_delta_enabled_flag,
> + .pps_cu_chroma_qp_offset_list_enabled_flag =
> pps->pps_cu_chroma_qp_offset_list_enabled_flag,
> + .pps_deblocking_filter_override_enabled_flag =
> pps->pps_deblocking_filter_override_enabled_flag,
> + .pps_deblocking_filter_disabled_flag =
> pps->pps_deblocking_filter_disabled_flag,
> + .pps_dbf_info_in_ph_flag =
> pps->pps_dbf_info_in_ph_flag,
> + .pps_sao_info_in_ph_flag =
> pps->pps_sao_info_in_ph_flag,
> + .pps_alf_info_in_ph_flag =
> pps->pps_alf_info_in_ph_flag,
> + },
> + .ph_lmcs_aps_id =
> ph->ph_lmcs_aps_id,
> + .ph_scaling_list_aps_id =
> ph->ph_scaling_list_aps_id,
> + .ph_log2_diff_min_qt_min_cb_intra_slice_luma =
> ph->ph_log2_diff_min_qt_min_cb_intra_slice_luma,
> + .ph_max_mtt_hierarchy_depth_intra_slice_luma =
> ph->ph_max_mtt_hierarchy_depth_intra_slice_luma,
> + .ph_log2_diff_max_bt_min_qt_intra_slice_luma =
> ph->ph_log2_diff_max_bt_min_qt_intra_slice_luma,
> + .ph_log2_diff_max_tt_min_qt_intra_slice_luma =
> ph->ph_log2_diff_max_tt_min_qt_intra_slice_luma,
> + .ph_log2_diff_min_qt_min_cb_intra_slice_chroma =
> ph->ph_log2_diff_min_qt_min_cb_intra_slice_chroma,
> + .ph_max_mtt_hierarchy_depth_intra_slice_chroma =
> ph->ph_max_mtt_hierarchy_depth_intra_slice_chroma,
> + .ph_log2_diff_max_bt_min_qt_intra_slice_chroma =
> ph->ph_log2_diff_max_bt_min_qt_intra_slice_chroma,
> + .ph_log2_diff_max_tt_min_qt_intra_slice_chroma =
> ph->ph_log2_diff_max_tt_min_qt_intra_slice_chroma,
> + .ph_cu_qp_delta_subdiv_intra_slice =
> ph->ph_cu_qp_delta_subdiv_intra_slice,
> + .ph_cu_chroma_qp_offset_subdiv_intra_slice =
> ph->ph_cu_chroma_qp_offset_subdiv_intra_slice,
> + .ph_log2_diff_min_qt_min_cb_inter_slice =
> ph->ph_log2_diff_min_qt_min_cb_inter_slice,
> + .ph_max_mtt_hierarchy_depth_inter_slice =
> ph->ph_max_mtt_hierarchy_depth_inter_slice,
> + .ph_log2_diff_max_bt_min_qt_inter_slice =
> ph->ph_log2_diff_max_bt_min_qt_inter_slice,
> + .ph_log2_diff_max_tt_min_qt_inter_slice =
> ph->ph_log2_diff_max_tt_min_qt_inter_slice,
> + .ph_cu_qp_delta_subdiv_inter_slice =
> ph->ph_cu_qp_delta_subdiv_inter_slice,
> + .ph_cu_chroma_qp_offset_subdiv_inter_slice =
> ph->ph_cu_chroma_qp_offset_subdiv_inter_slice,
> + .ph_flags.bits= {
> + .ph_non_ref_pic_flag =
> ph->ph_non_ref_pic_flag,
> + .ph_alf_enabled_flag =
> ph->ph_alf_enabled_flag,
> + .ph_alf_cb_enabled_flag =
> ph->ph_alf_cb_enabled_flag,
> + .ph_alf_cr_enabled_flag =
> ph->ph_alf_cr_enabled_flag,
> + .ph_alf_cc_cb_enabled_flag =
> ph->ph_alf_cc_cb_enabled_flag,
> + .ph_alf_cc_cr_enabled_flag =
> ph->ph_alf_cc_cr_enabled_flag,
> + .ph_lmcs_enabled_flag =
> ph->ph_lmcs_enabled_flag,
> + .ph_chroma_residual_scale_flag =
> ph->ph_chroma_residual_scale_flag,
> + .ph_explicit_scaling_list_enabled_flag =
> ph->ph_explicit_scaling_list_enabled_flag,
> + .ph_virtual_boundaries_present_flag =
> ph->ph_virtual_boundaries_present_flag,
> + .ph_temporal_mvp_enabled_flag =
> ph->ph_temporal_mvp_enabled_flag,
> + .ph_mmvd_fullpel_only_flag =
> ph->ph_mmvd_fullpel_only_flag,
> + .ph_mvd_l1_zero_flag =
> ph->ph_mvd_l1_zero_flag,
> + .ph_bdof_disabled_flag =
> ph->ph_bdof_disabled_flag,
> + .ph_dmvr_disabled_flag =
> ph->ph_dmvr_disabled_flag,
> + .ph_prof_disabled_flag =
> ph->ph_prof_disabled_flag,
> + .ph_joint_cbcr_sign_flag =
> ph->ph_joint_cbcr_sign_flag,
> + .ph_sao_luma_enabled_flag =
> ph->ph_sao_luma_enabled_flag,
> + .ph_sao_chroma_enabled_flag =
> ph->ph_sao_chroma_enabled_flag,
> + .ph_deblocking_filter_disabled_flag =
> ph->ph_deblocking_filter_disabled_flag,
> + },
> + .PicMiscFlags.fields = {
> + .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag ? 0 :
> IS_IRAP(h) ? 1 : 0,
> + }
> + };
> +
> + fill_vaapi_pic(&pic_param->CurrPic, fc->ref);
> + fill_vaapi_reference_frames(fc, pic_param);
> +
> + for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++)
> + for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++)
> + pic_param->ChromaQpTable[i][j] =
> fc->ps.sps->chroma_qp_table[i][j];
> + for (i = 0; i < 4; i++) {
> + pic_param->sps_ladf_qp_offset[i] =
> sps->sps_ladf_qp_offset[i];
> + pic_param->sps_ladf_delta_threshold_minus1[i] =
> sps->sps_ladf_delta_threshold_minus1[i];
> + }
> +
> + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ?
> sps->sps_num_ver_virtual_boundaries : ph->ph_num_ver_virtual_boundaries);
> i++) {
> + pic_param->VirtualBoundaryPosX[i] =
> (sps->sps_virtual_boundaries_present_flag ?
> +
> (sps->sps_virtual_boundary_pos_x_minus1[ i ] + 1) :
> +
> (ph->ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8;
> + }
> +
> + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ?
> sps->sps_num_hor_virtual_boundaries : ph->ph_num_hor_virtual_boundaries);
> i++) {
> + pic_param->VirtualBoundaryPosY[i] =
> (sps->sps_virtual_boundaries_present_flag ?
> +
> (sps->sps_virtual_boundary_pos_y_minus1[ i ] + 1) :
> +
> (ph->ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8;
> + }
> +
> + for (i = 0; i < 6; i++) {
> + pic_param->pps_cb_qp_offset_list[i] =
> pps->pps_cb_qp_offset_list[i];
> + pic_param->pps_cr_qp_offset_list[i] =
> pps->pps_cr_qp_offset_list[i];
> + pic_param->pps_joint_cbcr_qp_offset_list[i] =
> pps->pps_joint_cbcr_qp_offset_list[i];
> + }
> +
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VAPictureParameterBufferType,
> + &pic->pic_param,
> sizeof(VAPictureParameterBufferVVC));
> + if (err < 0)
> + goto fail;
> +
> + for (i = 0; i <= sps->sps_num_subpics_minus1 &&
> sps->sps_subpic_info_present_flag; i++) {
> + VASubPicVVC subpic_param = {
> + .sps_subpic_ctu_top_left_x =
> sps->sps_subpic_ctu_top_left_x[i],
> + .sps_subpic_ctu_top_left_y =
> sps->sps_subpic_ctu_top_left_y[i],
> + .sps_subpic_width_minus1 = sps->sps_subpic_width_minus1[i],
> + .sps_subpic_height_minus1 = sps->sps_subpic_height_minus1[i],
> + .SubpicIdVal = pps->sub_pic_id_val[i],
> + .subpic_flags.bits = {
> + .sps_subpic_treated_as_pic_flag =
> sps->sps_subpic_treated_as_pic_flag[i],
> + .sps_loop_filter_across_subpic_enabled_flag =
> sps->sps_loop_filter_across_subpic_enabled_flag[i],
> + }
> + };
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VASubPicBufferType,
> + &subpic_param,
> sizeof(VASubPicVVC));
> + if (err < 0)
> + goto fail;
> + }
> +
> + for (i = 0; i < VVC_MAX_ALF_COUNT; i++) {
> + const VVCALF *alf_list = h->ps.alf_list[i];
> + if (alf_list) {
> + const H266RawAPS *alf = alf_list->r;
> + VAAlfDataVVC alf_param = {
> + .aps_adaptation_parameter_set_id = i,
> + .alf_luma_num_filters_signalled_minus1 =
> alf->alf_luma_num_filters_signalled_minus1,
> + .alf_chroma_num_alt_filters_minus1 =
> alf->alf_chroma_num_alt_filters_minus1,
> + .alf_cc_cb_filters_signalled_minus1 =
> alf->alf_cc_cb_filters_signalled_minus1,
> + .alf_cc_cr_filters_signalled_minus1 =
> alf->alf_cc_cr_filters_signalled_minus1,
> + .alf_flags.bits = {
> + .alf_luma_filter_signal_flag =
> alf->alf_luma_filter_signal_flag,
> + .alf_chroma_filter_signal_flag =
> alf->alf_chroma_filter_signal_flag,
> + .alf_cc_cb_filter_signal_flag =
> alf->alf_cc_cb_filter_signal_flag,
> + .alf_cc_cr_filter_signal_flag =
> alf->alf_cc_cr_filter_signal_flag,
> + .alf_luma_clip_flag =
> alf->alf_luma_clip_flag,
> + .alf_chroma_clip_flag =
> alf->alf_chroma_clip_flag,
> + }
> + };
> +
> + for (j = 0; j < 25; j++)
> + alf_param.alf_luma_coeff_delta_idx[j] =
> alf->alf_luma_coeff_delta_idx[j];
> +
> + for (j = 0; j < 25; j++) {
> + for (k = 0; k < 12; k++) {
> + alf_param.filtCoeff[j][k] =
> alf->alf_luma_coeff_abs[j][k] * (1 - 2 * alf->alf_luma_coeff_sign[j][k]);
> + alf_param.alf_luma_clip_idx[j][k] =
> alf->alf_luma_clip_idx[j][k];
> + }
> + }
> +
> + for (j = 0; j < 8; j++) {
> + for (k = 0; k < 6; k++) {
> + alf_param.AlfCoeffC[j][k] =
> alf->alf_chroma_coeff_abs[j][k] * (1 - 2 *
> alf->alf_chroma_coeff_sign[j][k]);
> + alf_param.alf_chroma_clip_idx[j][k] =
> alf->alf_chroma_clip_idx[j][k];
> + }
> + }
> +
> + for (j = 0; j < 4; j++) {
> + for (k = 0; k < 7; k++) {
> + if (alf->alf_cc_cb_mapped_coeff_abs[j][k])
> + alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 *
> alf->alf_cc_cb_coeff_sign[j][k]) * (1 <<
> (alf->alf_cc_cb_mapped_coeff_abs[j][k] - 1));
> + if (alf->alf_cc_cr_mapped_coeff_abs[j][k])
> + alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 *
> alf->alf_cc_cr_coeff_sign[j][k]) * (1 <<
> (alf->alf_cc_cr_mapped_coeff_abs[j][k] - 1));
> + }
> + }
> +
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VAAlfBufferType,
> + &alf_param,
> sizeof(VAAlfDataVVC));
> + if (err < 0)
> + goto fail;
> + }
> + }
> +
> + for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) {
> + const H266RawAPS *lmcs = h->ps.lmcs_list[i];
> + if (lmcs) {
> + VALmcsDataVVC lmcs_param = {
> + .aps_adaptation_parameter_set_id = i,
> + .lmcs_min_bin_idx = lmcs->lmcs_min_bin_idx,
> + .lmcs_delta_max_bin_idx =
> lmcs->lmcs_delta_max_bin_idx,
> + .lmcsDeltaCrs = (1 - 2 *
> lmcs->lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs,
> + };
> +
> + for (j = lmcs->lmcs_min_bin_idx; j <= 15 -
> lmcs->lmcs_delta_max_bin_idx; j++)
> + lmcs_param.lmcsDeltaCW[j] = (1 - 2 *
> lmcs->lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j];
> +
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VALmcsBufferType,
> + &lmcs_param,
> sizeof(VALmcsDataVVC));
> + if (err < 0)
> + goto fail;
> + }
> + }
> +
> + for (i = 0; i < VVC_MAX_SL_COUNT; i++) {
> + const VVCScalingList *sl = h->ps.scaling_list[i];
> + if (sl) {
> + int l;
> +
> + VAScalingListVVC sl_param = {
> + .aps_adaptation_parameter_set_id = i,
> + };
> +
> + for (j = 0; j < 14; j++)
> + sl_param.ScalingMatrixDCRec[j] =
> sl->scaling_matrix_dc_rec[j];
> +
> + for (j = 0; j < 2; j++)
> + for (k = 0; k < 2; k++)
> + for (l = 0; l < 2; l++)
> + sl_param.ScalingMatrixRec2x2[j][k][l] =
> sl->scaling_matrix_rec[j][l * 2 + k];
> +
> + for (j = 2; j < 8; j++)
> + for (k = 0; k < 4; k++)
> + for (l = 0; l < 4; l++)
> + sl_param.ScalingMatrixRec4x4[j - 2][k][l] =
> sl->scaling_matrix_rec[j][l * 4 + k];
> +
> + for (j = 8; j < 28; j++)
> + for (k = 0; k < 8; k++)
> + for (l = 0; l < 8; l++)
> + sl_param.ScalingMatrixRec8x8[j - 8][k][l] =
> sl->scaling_matrix_rec[j][l * 8 + k];
> +
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VAIQMatrixBufferType,
> + &sl_param,
> sizeof(VAScalingListVVC));
> + if (err < 0)
> + goto fail;
> + }
> + }
> +
> + for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) {
> + tile_dim = pps->pps_tile_column_width_minus1[i];
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VATileBufferType,
> + &tile_dim,
> sizeof(tile_dim));
> + if (err < 0)
> + goto fail;
> + }
> +
> + for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) {
> + tile_dim = pps->pps_tile_row_height_minus1[i];
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> + VATileBufferType,
> + &tile_dim,
> sizeof(tile_dim));
> + if (err < 0)
> + goto fail;
> + }
> +
> + if (!pps->pps_no_pic_partition_flag && pps->pps_rect_slice_flag &&
> !pps->pps_single_slice_per_subpic_flag) {
> + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
> + for (j = 0; j < pps->pps_num_exp_slices_in_tile[i]; j++) {
> + exp_slice_height_in_ctus[i + j] =
> pps->pps_exp_slice_height_in_ctus_minus1[i][j] + 1;
> + }
> + }
> + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
> + VASliceStructVVC ss_param = {
> + .SliceTopLeftTileIdx =
> pps->slice_top_left_tile_idx[i],
> + .pps_slice_width_in_tiles_minus1 =
> pps->pps_slice_width_in_tiles_minus1[i],
> + .pps_slice_height_in_tiles_minus1 =
> pps->pps_slice_height_in_tiles_minus1[i],
> + };
> +
> + if (pps->pps_slice_width_in_tiles_minus1[i] > 0 ||
> pps->pps_slice_height_in_tiles_minus1[i] > 0)
> + ss_param.pps_exp_slice_height_in_ctus_minus1 = 0;
> + else {
> + if (pps->num_slices_in_tile[i] == 1)
> + ss_param.pps_exp_slice_height_in_ctus_minus1 =
> pps->row_height_val[pps->slice_top_left_tile_idx[i] /
> pps->num_tile_columns] - 1;
> + else if (exp_slice_height_in_ctus[i])
> + ss_param.pps_exp_slice_height_in_ctus_minus1 =
> exp_slice_height_in_ctus[i] - 1;
> + else
> + continue;
> + }
> +
> + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> +
> VASliceStructBufferType,
> + &ss_param,
> sizeof(VASliceStructVVC));
> + if (err < 0)
> + goto fail;
> + }
> + }
> +
> + return 0;
> +
> +fail:
> + ff_vaapi_decode_cancel(avctx, &pic->pic);
> + return err;
> +}
> +
> +static uint8_t get_ref_pic_index(const VVCContext *h, const VVCFrame
> *frame)
> +{
> + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs)
> % h->nb_fcs];
> + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
> + VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC
> *)&pic->pic_param;
> + uint8_t i;
> +
> + if (!frame)
> + return 0xFF;
> +
> + for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
> + VASurfaceID pid = pp->ReferenceFrames[i].picture_id;
> + int poc = pp->ReferenceFrames[i].pic_order_cnt;
> + if (pid != VA_INVALID_ID && pid ==
> ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc)
> + return i;
> + }
> +
> + return 0xFF;
> +}
> +
> +static int get_slice_data_offset(const uint8_t *buffer, uint32_t size,
> const SliceContext* sc)
> +{
> + const H266RawSlice *slice = sc->ref;
> + int num_identical_bytes = slice->data_size < 32 ? slice->data_size
> : 32;
> +
> + for (int i = 0; i < size; i++) {
> + int skip_bytes = 0;
> + if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] && !buffer[i -
> 2])
> + continue;
> +
> + for (int j = 0; j < num_identical_bytes; j++) {
> + if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 && !buffer[i
> + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2])
> + skip_bytes++;
> +
> + if (buffer[i + j + skip_bytes] != slice->data[j])
> + break;
> +
> + if (j + 1 == num_identical_bytes)
> + return i;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int vaapi_vvc_decode_slice(AVCodecContext *avctx,
> + const uint8_t *buffer,
> + uint32_t size)
> +{
> + const VVCContext *h = avctx->priv_data;
> + VVCFrameContext *fc = &h->fcs[(h->nb_frames +
> h->nb_fcs) % h->nb_fcs];
> + const SliceContext *sc = fc->slices[fc->nb_slices];
> + const H266RawPPS *pps = fc->ps.pps->r;
> + const H266RawPictureHeader *ph = fc->ps.ph.r;
> + const H266RawSliceHeader *sh = sc->sh.r;
> + VAAPIDecodePictureVVC *pic =
> fc->ref->hwaccel_picture_private;
> + VASliceParameterBufferVVC *slice_param = &pic->slice_param;
> + int nb_list, i, err;
> +
> + *slice_param = (VASliceParameterBufferVVC) {
> + .slice_data_size = size,
> + .slice_data_offset = 0,
> + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL,
> + .slice_data_byte_offset = get_slice_data_offset(buffer,
> size, sc),
> + .sh_subpic_id = sh->sh_subpic_id,
> + .sh_slice_address = sh->sh_slice_address,
> + .sh_num_tiles_in_slice_minus1 =
> sh->sh_num_tiles_in_slice_minus1,
> + .sh_slice_type = sh->sh_slice_type,
> + .sh_num_alf_aps_ids_luma = sh->sh_num_alf_aps_ids_luma,
> + .sh_alf_aps_id_chroma = sh->sh_alf_aps_id_chroma,
> + .sh_alf_cc_cb_aps_id = sh->sh_alf_cc_cb_aps_id,
> + .sh_alf_cc_cr_aps_id = sh->sh_alf_cc_cr_aps_id,
> + .NumRefIdxActive[0] = sh->num_ref_idx_active[0],
> + .NumRefIdxActive[1] = sh->num_ref_idx_active[1],
> + .sh_collocated_ref_idx = sh->sh_collocated_ref_idx,
> + .SliceQpY =
> pps->pps_qp_delta_info_in_ph_flag ?
> + 26 + pps->pps_init_qp_minus26
> + ph->ph_qp_delta :
> + 26 + pps->pps_init_qp_minus26
> + sh->sh_qp_delta,
> + .sh_cb_qp_offset = sh->sh_cb_qp_offset,
> + .sh_cr_qp_offset = sh->sh_cr_qp_offset,
> + .sh_joint_cbcr_qp_offset = sh->sh_joint_cbcr_qp_offset,
> + .sh_luma_beta_offset_div2 = sh->sh_luma_beta_offset_div2,
> + .sh_luma_tc_offset_div2 = sh->sh_luma_tc_offset_div2,
> + .sh_cb_beta_offset_div2 = sh->sh_cb_beta_offset_div2,
> + .sh_cb_tc_offset_div2 = sh->sh_cb_tc_offset_div2,
> + .sh_cr_beta_offset_div2 = sh->sh_cr_beta_offset_div2,
> + .sh_cr_tc_offset_div2 = sh->sh_cr_tc_offset_div2,
> + .WPInfo = {
> + .luma_log2_weight_denom =
> sh->sh_pred_weight_table.luma_log2_weight_denom,
> + .delta_chroma_log2_weight_denom =
> sh->sh_pred_weight_table.delta_chroma_log2_weight_denom,
> + .num_l0_weights =
> sh->sh_pred_weight_table.num_l0_weights,
> + .num_l1_weights =
> sh->sh_pred_weight_table.num_l1_weights,
> + },
> + .sh_flags.bits = {
> + .sh_alf_enabled_flag =
> sh->sh_alf_enabled_flag,
> + .sh_alf_cb_enabled_flag =
> sh->sh_alf_cb_enabled_flag,
> + .sh_alf_cr_enabled_flag =
> sh->sh_alf_cr_enabled_flag,
> + .sh_alf_cc_cb_enabled_flag =
> sh->sh_alf_cc_cb_enabled_flag,
> + .sh_alf_cc_cr_enabled_flag =
> sh->sh_alf_cc_cr_enabled_flag,
> + .sh_lmcs_used_flag = sh->sh_lmcs_used_flag,
> + .sh_explicit_scaling_list_used_flag =
> sh->sh_explicit_scaling_list_used_flag,
> + .sh_cabac_init_flag = sh->sh_cabac_init_flag,
> + .sh_collocated_from_l0_flag =
> sh->sh_collocated_from_l0_flag,
> + .sh_cu_chroma_qp_offset_enabled_flag =
> sh->sh_cu_chroma_qp_offset_enabled_flag,
> + .sh_sao_luma_used_flag =
> sh->sh_sao_luma_used_flag,
> + .sh_sao_chroma_used_flag =
> sh->sh_sao_chroma_used_flag,
> + .sh_deblocking_filter_disabled_flag =
> sh->sh_deblocking_filter_disabled_flag,
> + .sh_dep_quant_used_flag =
> sh->sh_dep_quant_used_flag,
> + .sh_sign_data_hiding_used_flag =
> sh->sh_sign_data_hiding_used_flag,
> + .sh_ts_residual_coding_disabled_flag =
> sh->sh_ts_residual_coding_disabled_flag,
> + },
> + };
> +
> + memset(&slice_param->RefPicList, 0xFF,
> sizeof(slice_param->RefPicList));
> +
> + nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ?
> + 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1);
> + for (int list_idx = 0; list_idx < nb_list; list_idx++) {
> + RefPicList *rpl = &sc->rpl[list_idx];
> +
> + for (i = 0; i < rpl->nb_refs; i++)
> + slice_param->RefPicList[list_idx][i] = get_ref_pic_index(h,
> rpl->ref[i]);
> + }
> +
> + for (i = 0; i < 7; i++)
> + slice_param->sh_alf_aps_id_luma[i] = sh->sh_alf_aps_id_luma[i];
> +
> + for (i = 0; i < 15; i++) {
> + slice_param->WPInfo.luma_weight_l0_flag[i] =
> sh->sh_pred_weight_table.luma_weight_l0_flag[i];
> + slice_param->WPInfo.chroma_weight_l0_flag[i] =
> sh->sh_pred_weight_table.chroma_weight_l0_flag[i];
> + slice_param->WPInfo.delta_luma_weight_l0[i] =
> sh->sh_pred_weight_table.delta_luma_weight_l0[i];
> + slice_param->WPInfo.luma_offset_l0[i] =
> sh->sh_pred_weight_table.luma_offset_l0[i];
> + slice_param->WPInfo.luma_weight_l1_flag[i] =
> sh->sh_pred_weight_table.luma_weight_l1_flag[i];
> + slice_param->WPInfo.chroma_weight_l1_flag[i] =
> sh->sh_pred_weight_table.chroma_weight_l1_flag[i];
> + slice_param->WPInfo.delta_luma_weight_l1[i] =
> sh->sh_pred_weight_table.delta_luma_weight_l1[i];
> + slice_param->WPInfo.luma_offset_l1[i] =
> sh->sh_pred_weight_table.luma_offset_l1[i];
> + }
> +
> + for (i = 0; i < 15; i++) {
> + for (int j = 0; j < 2; j++) {
> + slice_param->WPInfo.delta_chroma_weight_l0[i][j] =
> sh->sh_pred_weight_table.delta_chroma_weight_l0[i][j];
> + slice_param->WPInfo.delta_chroma_offset_l0[i][j] =
> sh->sh_pred_weight_table.delta_chroma_offset_l0[i][j];
> + slice_param->WPInfo.delta_chroma_weight_l1[i][j] =
> sh->sh_pred_weight_table.delta_chroma_weight_l1[i][j];
> + slice_param->WPInfo.delta_chroma_offset_l1[i][j] =
> sh->sh_pred_weight_table.delta_chroma_offset_l1[i][j];
> + }
> + }
> +
> + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic,
> + &pic->slice_param,
> +
> sizeof(VASliceParameterBufferVVC),
> + buffer, size);
> + if (err) {
> + ff_vaapi_decode_cancel(avctx, &pic->pic);
> + return err;
> + }
> +
> + return 0;
> +}
> +
> +static int vaapi_vvc_end_frame(AVCodecContext *avctx)
> +{
> +
> + const VVCContext *h = avctx->priv_data;
> + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) %
> h->nb_fcs];
> + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
> + int ret;
> +
> + ret = ff_vaapi_decode_issue(avctx, &pic->pic);
> + if (ret < 0)
> + goto fail;
> +
> + pic->decode_issued = 1;
> +
> + return 0;
> +
> +fail:
> + ff_vaapi_decode_cancel(avctx, &pic->pic);
> + return ret;
> +}
> +
> +const FFHWAccel ff_vvc_vaapi_hwaccel = {
> + .p.name = "vvc_vaapi",
> + .p.type = AVMEDIA_TYPE_VIDEO,
> + .p.id = AV_CODEC_ID_VVC,
> + .p.pix_fmt = AV_PIX_FMT_VAAPI,
> + .start_frame = &vaapi_vvc_start_frame,
> + .end_frame = &vaapi_vvc_end_frame,
> + .decode_slice = &vaapi_vvc_decode_slice,
> + .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC),
> + .init = &ff_vaapi_decode_init,
> + .uninit = &ff_vaapi_decode_uninit,
> + .frame_params = &ff_vaapi_common_frame_params,
> + .priv_data_size = sizeof(VAAPIDecodeContext),
> + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> +};
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 06631ffa8c..7aa95fc3f1 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -29,7 +29,7 @@
>
> #include "version_major.h"
>
> -#define LIBAVCODEC_VERSION_MINOR 4
> +#define LIBAVCODEC_VERSION_MINOR 5
> #define LIBAVCODEC_VERSION_MICRO 100
>
> #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
> index f2e269ce76..b204a0c73a 100644
> --- a/libavcodec/vvc/vvcdec.c
> +++ b/libavcodec/vvc/vvcdec.c
> @@ -29,6 +29,7 @@
> #include "libavutil/cpu.h"
> #include "libavutil/thread.h"
>
> +#include "config_components.h"
> #include "vvcdec.h"
> #include "vvc_ctu.h"
> #include "vvc_data.h"
> @@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc, VVCContext
> *s, VVCFrameContext *fc,
>
> static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS
> *sps)
> {
> -#define HWACCEL_MAX 0
> +#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL
>
> enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
>
> switch (sps->pix_fmt) {
> case AV_PIX_FMT_YUV420P:
> +#if CONFIG_VVC_VAAPI_HWACCEL
> + *fmt++ = AV_PIX_FMT_VAAPI;
> +#endif
> break;
> case AV_PIX_FMT_YUV420P10:
> +#if CONFIG_VVC_VAAPI_HWACCEL
> + *fmt++ = AV_PIX_FMT_VAAPI;
> +#endif
> break;
> }
>
> @@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = {
> .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING |
> FF_CODEC_CAP_INIT_CLEANUP |
> FF_CODEC_CAP_AUTO_THREADS,
> .p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles),
> + .hw_configs = (const AVCodecHWConfigInternal *const []) {
> +#if CONFIG_VVC_VAAPI_HWACCEL
> + HWACCEL_VAAPI(vvc),
> +#endif
> + NULL
> + },
> };
> --
> 2.25.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder
2024-04-02 12:48 ` Nuo Mi
@ 2024-04-03 3:31 ` Wang, Fei W
2024-04-06 5:03 ` Nuo Mi
0 siblings, 1 reply; 14+ messages in thread
From: Wang, Fei W @ 2024-04-03 3:31 UTC (permalink / raw)
To: nuomi2021, ffmpeg-devel
On Tue, 2024-04-02 at 20:48 +0800, Nuo Mi wrote:
>
>
> On Thu, Mar 28, 2024 at 9:27 AM <fei.w.wang-at-intel.com@ffmpeg.org>
> wrote:
> > From: Fei Wang <fei.w.wang@intel.com>
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> > Changelog | 4 +
> > configure | 3 +
> > libavcodec/Makefile | 1 +
> > libavcodec/hwaccels.h | 1 +
> > libavcodec/vaapi_decode.c | 7 +
> > libavcodec/vaapi_vvc.c | 657
> > ++++++++++++++++++++++++++++++++++++++
> > libavcodec/version.h | 2 +-
> > libavcodec/vvc/vvcdec.c | 15 +-
> > 8 files changed, 688 insertions(+), 2 deletions(-)
> > create mode 100644 libavcodec/vaapi_vvc.c
> >
> > diff --git a/Changelog b/Changelog
> > index e83a00e35c..3108e65558 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -1,6 +1,10 @@
> > Entries are sorted chronologically from oldest to youngest within
> > each release,
> > releases are sorted from youngest to oldest.
> >
> > +version <next>:
> > +- VVC VAAPI decoder
> > +
> > +
> > version 7.0:
> > - DXV DXT1 encoder
> > - LEAD MCMP decoder
> > diff --git a/configure b/configure
> > index 2a1d22310b..d902c9adc8 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau
> > VdpPictureInfoVP9"
> > vp9_vdpau_hwaccel_select="vp9_decoder"
> > vp9_videotoolbox_hwaccel_deps="videotoolbox"
> > vp9_videotoolbox_hwaccel_select="vp9_decoder"
> > +vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC"
> > +vvc_vaapi_hwaccel_select="vvc_decoder"
> > wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
> > wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel"
> > wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel"
> > @@ -7250,6 +7252,7 @@ if enabled vaapi; then
> > check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0,
> > 0)"
> >
> > check_type "va/va.h va/va_dec_hevc.h"
> > "VAPictureParameterBufferHEVC"
> > + check_type "va/va.h va/va_dec_vvc.h"
> > "VAPictureParameterBufferVVC"
> > check_struct "va/va.h" "VADecPictureParameterBufferVP9"
> > bit_depth
> > check_struct "va/va.h" "VADecPictureParameterBufferAV1"
> > bit_depth_idx
> > check_type "va/va.h va/va_vpp.h"
> > "VAProcFilterParameterBufferHDRToneMapping"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 9ce6d445c1..343b037636 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) +=
> > vaapi_vp9.o
> > OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
> > OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
> > OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
> > +OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
> >
> > # Objects duplicated from other libraries for shared builds
> > SHLIBOBJS += log2_tab.o reverse.o
> > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> > index 5171e4c7d7..88d6b9a9b5 100644
> > --- a/libavcodec/hwaccels.h
> > +++ b/libavcodec/hwaccels.h
> > @@ -82,6 +82,7 @@ extern const struct FFHWAccel
> > ff_vp9_nvdec_hwaccel;
> > extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
> > extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
> > extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
> > +extern const struct FFHWAccel ff_vvc_vaapi_hwaccel;
> > extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel;
> > extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel;
> > extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel;
> > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
> > index 1b1972a2a9..ceeb1f1a12 100644
> > --- a/libavcodec/vaapi_decode.c
> > +++ b/libavcodec/vaapi_decode.c
> > @@ -455,6 +455,9 @@ static const struct {
> > MAP(AV1, AV1_MAIN, AV1Profile0),
> > MAP(AV1, AV1_HIGH, AV1Profile1),
> > #endif
> > +#if VA_CHECK_VERSION(1, 22, 0)
> > + MAP(H266, VVC_MAIN_10, VVCMain10),
> > +#endif
> >
> > #undef MAP
> > };
> > @@ -627,6 +630,10 @@ static int
> > vaapi_decode_make_config(AVCodecContext *avctx,
> > case AV_CODEC_ID_VP8:
> > frames->initial_pool_size += 3;
> > break;
> > + case AV_CODEC_ID_H266:
> > + // Add additional 16 for maximum 16 frames delay in
> > vvc native decode.
> > + frames->initial_pool_size += 32;
>
> One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames
> amount to approximately 6GB.Can we dynamically allocate the buffer
> pool?
It's processing in other thread:
https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=11316
>
> The software decoder requires a delay of 16 frames to ensure full
> utilization of CPUs. In the future, we may consider increasing this
> to 32 or even 64 frames.
> However, for hardware decoding, given that all processing occurs on
> the GPU, we do not require any delay.
The delay can avoid sync hardware task immediately once it is
submitted, which can avoid hardware switch tasks frequently and drop
performance. If the number will increase, I'd prefer to set it as an
option and diff the default value for hardware with software.
Thanks
Fei
>
> > + break;
> > default:
> > frames->initial_pool_size += 2;
> > }
> > diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c
> > new file mode 100644
> > index 0000000000..6141005688
> > --- /dev/null
> > +++ b/libavcodec/vaapi_vvc.c
> > @@ -0,0 +1,657 @@
> > +/*
> > + * VVC HW decode acceleration through VA API
> > + *
> > + * Copyright (c) 2024 Intel Corporation
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General
> > Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include <va/va.h>
> > +#include <va/va_dec_vvc.h>
> > +
> > +#include "vvc/vvcdec.h"
> > +#include "vvc/vvc_refs.h"
> > +#include "hwaccel_internal.h"
> > +#include "vaapi_decode.h"
> > +
> > +typedef struct VAAPIDecodePictureVVC {
> > + VAAPIDecodePicture pic;
> > + VAPictureParameterBufferVVC pic_param;
> > + VASliceParameterBufferVVC slice_param;
> > + int decode_issued;
> > +} VAAPIDecodePictureVVC;
> > +
> > +static void init_vaapi_pic(VAPictureVVC *va_pic)
> > +{
> > + va_pic->picture_id = VA_INVALID_ID;
> > + va_pic->flags = VA_PICTURE_VVC_INVALID;
> > + va_pic->pic_order_cnt = 0;
> > +}
> > +
> > +static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame
> > *pic)
> > +{
> > + va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame);
> > + va_pic->pic_order_cnt = pic->poc;
> > + va_pic->flags = 0;
> > +
> > + if (pic->flags & VVC_FRAME_FLAG_LONG_REF)
> > + va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE;
> > +}
> > +
> > +static void fill_vaapi_reference_frames(const VVCFrameContext *h,
> > VAPictureParameterBufferVVC *pp)
> > +{
> > + const VVCFrame *current_picture = h->ref;
> > + int i, j;
> > +
> > + for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames);
> > i++) {
> > + const VVCFrame *frame = NULL;
> > +
> > + while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) {
> > + if ((&h->DPB[j] != current_picture ) &&
> > + (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF |
> > VVC_FRAME_FLAG_SHORT_REF)))
> > + frame = &h->DPB[j];
> > + j++;
> > + }
> > +
> > + init_vaapi_pic(&pp->ReferenceFrames[i]);
> > +
> > + if (frame) {
> > + VAAPIDecodePictureVVC *pic;
> > + fill_vaapi_pic(&pp->ReferenceFrames[i], frame);
> > + pic = frame->hwaccel_picture_private;
> > + if (!pic->decode_issued)
> > + pp->ReferenceFrames[i].flags |=
> > VA_PICTURE_VVC_UNAVAILABLE_REFERENCE;
> > + }
> > + }
> > +}
> > +
> > +static int vaapi_vvc_start_frame(AVCodecContext *avctx,
> > + av_unused const uint8_t *buffer,
> > + av_unused uint32_t size)
> > +{
> > + const VVCContext *h = avctx->priv_data;
> > + VVCFrameContext *fc = &h->fcs[(h->nb_frames
> > + h->nb_fcs) % h->nb_fcs];
> > + const H266RawSPS *sps = fc->ps.sps->r;
> > + const H266RawPPS *pps = fc->ps.pps->r;
> > + const H266RawPictureHeader *ph = fc->ps.ph.r;
> > + VAAPIDecodePictureVVC *pic = fc->ref-
> > >hwaccel_picture_private;
> > + VAPictureParameterBufferVVC *pic_param = &pic->pic_param;
> > + uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] =
> > {0};
> > + int i, j, k, err;
> > +
> > + pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref-
> > >frame);
> > +
> > + *pic_param = (VAPictureParameterBufferVVC) {
> > + .pps_pic_width_in_luma_samples = pps-
> > >pps_pic_width_in_luma_samples,
> > + .pps_pic_height_in_luma_samples = pps-
> > >pps_pic_height_in_luma_samples,
> > + .sps_num_subpics_minus1 = sps-
> > >sps_num_subpics_minus1,
> > + .sps_chroma_format_idc = sps-
> > >sps_chroma_format_idc,
> > + .sps_bitdepth_minus8 = sps-
> > >sps_bitdepth_minus8,
> > + .sps_log2_ctu_size_minus5 = sps-
> > >sps_log2_ctu_size_minus5,
> > + .sps_log2_min_luma_coding_block_size_minus2 = sps-
> > >sps_log2_min_luma_coding_block_size_minus2,
> > + .sps_log2_transform_skip_max_size_minus2 = sps-
> > >sps_log2_transform_skip_max_size_minus2,
> > + .sps_six_minus_max_num_merge_cand = sps-
> > >sps_six_minus_max_num_merge_cand,
> > + .sps_five_minus_max_num_subblock_merge_cand = sps-
> > >sps_five_minus_max_num_subblock_merge_cand,
> > + .sps_max_num_merge_cand_minus_max_num_gpm_cand = sps-
> > >sps_max_num_merge_cand_minus_max_num_gpm_cand,
> > + .sps_log2_parallel_merge_level_minus2 = sps-
> > >sps_log2_parallel_merge_level_minus2,
> > + .sps_min_qp_prime_ts = sps-
> > >sps_min_qp_prime_ts,
> > + .sps_six_minus_max_num_ibc_merge_cand = sps-
> > >sps_six_minus_max_num_ibc_merge_cand,
> > + .sps_num_ladf_intervals_minus2 = sps-
> > >sps_num_ladf_intervals_minus2,
> > + .sps_ladf_lowest_interval_qp_offset = sps-
> > >sps_ladf_lowest_interval_qp_offset,
> > + .sps_flags.bits = {
> > + .sps_subpic_info_present_flag
> > = sps->sps_subpic_info_present_flag,
> > + .sps_independent_subpics_flag
> > = sps->sps_independent_subpics_flag,
> > + .sps_subpic_same_size_flag
> > = sps->sps_subpic_same_size_flag,
> > + .sps_entropy_coding_sync_enabled_flag
> > = sps->sps_entropy_coding_sync_enabled_flag,
> > + .sps_qtbtt_dual_tree_intra_flag
> > = sps->sps_qtbtt_dual_tree_intra_flag,
> > + .sps_max_luma_transform_size_64_flag
> > = sps->sps_max_luma_transform_size_64_flag,
> > + .sps_transform_skip_enabled_flag
> > = sps->sps_transform_skip_enabled_flag,
> > + .sps_bdpcm_enabled_flag
> > = sps->sps_bdpcm_enabled_flag,
> > + .sps_mts_enabled_flag
> > = sps->sps_mts_enabled_flag,
> > + .sps_explicit_mts_intra_enabled_flag
> > = sps->sps_explicit_mts_intra_enabled_flag,
> > + .sps_explicit_mts_inter_enabled_flag
> > = sps->sps_explicit_mts_inter_enabled_flag,
> > + .sps_lfnst_enabled_flag
> > = sps->sps_lfnst_enabled_flag,
> > + .sps_joint_cbcr_enabled_flag
> > = sps->sps_joint_cbcr_enabled_flag,
> > + .sps_same_qp_table_for_chroma_flag
> > = sps->sps_same_qp_table_for_chroma_flag,
> > + .sps_sao_enabled_flag
> > = sps->sps_sao_enabled_flag,
> > + .sps_alf_enabled_flag
> > = sps->sps_alf_enabled_flag,
> > + .sps_ccalf_enabled_flag
> > = sps->sps_alf_enabled_flag,
> > + .sps_lmcs_enabled_flag
> > = sps->sps_lmcs_enabled_flag,
> > + .sps_sbtmvp_enabled_flag
> > = sps->sps_sbtmvp_enabled_flag,
> > + .sps_amvr_enabled_flag
> > = sps->sps_amvr_enabled_flag,
> > + .sps_smvd_enabled_flag
> > = sps->sps_smvd_enabled_flag,
> > + .sps_mmvd_enabled_flag
> > = sps->sps_mmvd_enabled_flag,
> > + .sps_sbt_enabled_flag
> > = sps->sps_sbt_enabled_flag,
> > + .sps_affine_enabled_flag
> > = sps->sps_affine_enabled_flag,
> > + .sps_6param_affine_enabled_flag
> > = sps->sps_6param_affine_enabled_flag,
> > + .sps_affine_amvr_enabled_flag
> > = sps->sps_affine_amvr_enabled_flag,
> > + .sps_affine_prof_enabled_flag
> > = sps->sps_affine_prof_enabled_flag,
> > + .sps_bcw_enabled_flag
> > = sps->sps_bcw_enabled_flag,
> > + .sps_ciip_enabled_flag
> > = sps->sps_ciip_enabled_flag,
> > + .sps_gpm_enabled_flag
> > = sps->sps_gpm_enabled_flag,
> > + .sps_isp_enabled_flag
> > = sps->sps_isp_enabled_flag,
> > + .sps_mrl_enabled_flag
> > = sps->sps_mrl_enabled_flag,
> > + .sps_mip_enabled_flag
> > = sps->sps_mip_enabled_flag,
> > + .sps_cclm_enabled_flag
> > = sps->sps_cclm_enabled_flag,
> > + .sps_chroma_horizontal_collocated_flag
> > = sps->sps_chroma_horizontal_collocated_flag,
> > + .sps_chroma_vertical_collocated_flag
> > = sps->sps_chroma_vertical_collocated_flag,
> > + .sps_palette_enabled_flag
> > = sps->sps_palette_enabled_flag,
> > + .sps_act_enabled_flag
> > = sps->sps_act_enabled_flag,
> > + .sps_ibc_enabled_flag
> > = sps->sps_ibc_enabled_flag,
> > + .sps_ladf_enabled_flag
> > = sps->sps_ladf_enabled_flag,
> > + .sps_explicit_scaling_list_enabled_flag
> > = sps->sps_explicit_scaling_list_enabled_flag,
> > + .sps_scaling_matrix_for_lfnst_disabled_flag
> > = sps->sps_scaling_matrix_for_lfnst_disabled_flag,
> > +
> > .sps_scaling_matrix_for_alternative_colour_space_disabled_flag =
> > sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag,
> > + .sps_scaling_matrix_designated_colour_space_flag
> > = sps->sps_scaling_matrix_designated_colour_space_flag,
> > + .sps_virtual_boundaries_enabled_flag
> > = sps->sps_virtual_boundaries_enabled_flag,
> > + .sps_virtual_boundaries_present_flag
> > = sps->sps_virtual_boundaries_present_flag,
> > + },
> > + .NumVerVirtualBoundaries = sps-
> > >sps_virtual_boundaries_present_flag ?
> > + sps-
> > >sps_num_ver_virtual_boundaries :
> > + ph-
> > >ph_num_ver_virtual_boundaries,
> > + .NumHorVirtualBoundaries = sps-
> > >sps_virtual_boundaries_present_flag ?
> > + sps-
> > >sps_num_hor_virtual_boundaries :
> > + ph-
> > >ph_num_hor_virtual_boundaries,
> > + .pps_scaling_win_left_offset = pps-
> > >pps_scaling_win_left_offset,
> > + .pps_scaling_win_right_offset = pps-
> > >pps_scaling_win_right_offset,
> > + .pps_scaling_win_top_offset = pps-
> > >pps_scaling_win_top_offset,
> > + .pps_scaling_win_bottom_offset = pps-
> > >pps_scaling_win_bottom_offset,
> > + .pps_num_exp_tile_columns_minus1 = pps-
> > >pps_num_exp_tile_columns_minus1,
> > + .pps_num_exp_tile_rows_minus1 = pps-
> > >pps_num_exp_tile_rows_minus1,
> > + .pps_num_slices_in_pic_minus1 = pps-
> > >pps_num_slices_in_pic_minus1,
> > + .pps_pic_width_minus_wraparound_offset = pps-
> > >pps_pic_width_minus_wraparound_offset,
> > + .pps_cb_qp_offset = pps-
> > >pps_cb_qp_offset,
> > + .pps_cr_qp_offset = pps-
> > >pps_cr_qp_offset,
> > + .pps_joint_cbcr_qp_offset_value = pps-
> > >pps_joint_cbcr_qp_offset_value,
> > + .pps_chroma_qp_offset_list_len_minus1 = pps-
> > >pps_chroma_qp_offset_list_len_minus1,
> > + .pps_flags.bits = {
> > + .pps_loop_filter_across_tiles_enabled_flag = pps-
> > >pps_loop_filter_across_tiles_enabled_flag,
> > + .pps_rect_slice_flag = pps-
> > >pps_rect_slice_flag,
> > + .pps_single_slice_per_subpic_flag = pps-
> > >pps_single_slice_per_subpic_flag,
> > + .pps_loop_filter_across_slices_enabled_flag = pps-
> > >pps_loop_filter_across_slices_enabled_flag,
> > + .pps_weighted_pred_flag = pps-
> > >pps_weighted_pred_flag,
> > + .pps_weighted_bipred_flag = pps-
> > >pps_weighted_bipred_flag,
> > + .pps_ref_wraparound_enabled_flag = pps-
> > >pps_ref_wraparound_enabled_flag,
> > + .pps_cu_qp_delta_enabled_flag = pps-
> > >pps_cu_qp_delta_enabled_flag,
> > + .pps_cu_chroma_qp_offset_list_enabled_flag = pps-
> > >pps_cu_chroma_qp_offset_list_enabled_flag,
> > + .pps_deblocking_filter_override_enabled_flag = pps-
> > >pps_deblocking_filter_override_enabled_flag,
> > + .pps_deblocking_filter_disabled_flag = pps-
> > >pps_deblocking_filter_disabled_flag,
> > + .pps_dbf_info_in_ph_flag = pps-
> > >pps_dbf_info_in_ph_flag,
> > + .pps_sao_info_in_ph_flag = pps-
> > >pps_sao_info_in_ph_flag,
> > + .pps_alf_info_in_ph_flag = pps-
> > >pps_alf_info_in_ph_flag,
> > + },
> > + .ph_lmcs_aps_id = ph-
> > >ph_lmcs_aps_id,
> > + .ph_scaling_list_aps_id = ph-
> > >ph_scaling_list_aps_id,
> > + .ph_log2_diff_min_qt_min_cb_intra_slice_luma = ph-
> > >ph_log2_diff_min_qt_min_cb_intra_slice_luma,
> > + .ph_max_mtt_hierarchy_depth_intra_slice_luma = ph-
> > >ph_max_mtt_hierarchy_depth_intra_slice_luma,
> > + .ph_log2_diff_max_bt_min_qt_intra_slice_luma = ph-
> > >ph_log2_diff_max_bt_min_qt_intra_slice_luma,
> > + .ph_log2_diff_max_tt_min_qt_intra_slice_luma = ph-
> > >ph_log2_diff_max_tt_min_qt_intra_slice_luma,
> > + .ph_log2_diff_min_qt_min_cb_intra_slice_chroma = ph-
> > >ph_log2_diff_min_qt_min_cb_intra_slice_chroma,
> > + .ph_max_mtt_hierarchy_depth_intra_slice_chroma = ph-
> > >ph_max_mtt_hierarchy_depth_intra_slice_chroma,
> > + .ph_log2_diff_max_bt_min_qt_intra_slice_chroma = ph-
> > >ph_log2_diff_max_bt_min_qt_intra_slice_chroma,
> > + .ph_log2_diff_max_tt_min_qt_intra_slice_chroma = ph-
> > >ph_log2_diff_max_tt_min_qt_intra_slice_chroma,
> > + .ph_cu_qp_delta_subdiv_intra_slice = ph-
> > >ph_cu_qp_delta_subdiv_intra_slice,
> > + .ph_cu_chroma_qp_offset_subdiv_intra_slice = ph-
> > >ph_cu_chroma_qp_offset_subdiv_intra_slice,
> > + .ph_log2_diff_min_qt_min_cb_inter_slice = ph-
> > >ph_log2_diff_min_qt_min_cb_inter_slice,
> > + .ph_max_mtt_hierarchy_depth_inter_slice = ph-
> > >ph_max_mtt_hierarchy_depth_inter_slice,
> > + .ph_log2_diff_max_bt_min_qt_inter_slice = ph-
> > >ph_log2_diff_max_bt_min_qt_inter_slice,
> > + .ph_log2_diff_max_tt_min_qt_inter_slice = ph-
> > >ph_log2_diff_max_tt_min_qt_inter_slice,
> > + .ph_cu_qp_delta_subdiv_inter_slice = ph-
> > >ph_cu_qp_delta_subdiv_inter_slice,
> > + .ph_cu_chroma_qp_offset_subdiv_inter_slice = ph-
> > >ph_cu_chroma_qp_offset_subdiv_inter_slice,
> > + .ph_flags.bits= {
> > + .ph_non_ref_pic_flag = ph-
> > >ph_non_ref_pic_flag,
> > + .ph_alf_enabled_flag = ph-
> > >ph_alf_enabled_flag,
> > + .ph_alf_cb_enabled_flag = ph-
> > >ph_alf_cb_enabled_flag,
> > + .ph_alf_cr_enabled_flag = ph-
> > >ph_alf_cr_enabled_flag,
> > + .ph_alf_cc_cb_enabled_flag = ph-
> > >ph_alf_cc_cb_enabled_flag,
> > + .ph_alf_cc_cr_enabled_flag = ph-
> > >ph_alf_cc_cr_enabled_flag,
> > + .ph_lmcs_enabled_flag = ph-
> > >ph_lmcs_enabled_flag,
> > + .ph_chroma_residual_scale_flag = ph-
> > >ph_chroma_residual_scale_flag,
> > + .ph_explicit_scaling_list_enabled_flag = ph-
> > >ph_explicit_scaling_list_enabled_flag,
> > + .ph_virtual_boundaries_present_flag = ph-
> > >ph_virtual_boundaries_present_flag,
> > + .ph_temporal_mvp_enabled_flag = ph-
> > >ph_temporal_mvp_enabled_flag,
> > + .ph_mmvd_fullpel_only_flag = ph-
> > >ph_mmvd_fullpel_only_flag,
> > + .ph_mvd_l1_zero_flag = ph-
> > >ph_mvd_l1_zero_flag,
> > + .ph_bdof_disabled_flag = ph-
> > >ph_bdof_disabled_flag,
> > + .ph_dmvr_disabled_flag = ph-
> > >ph_dmvr_disabled_flag,
> > + .ph_prof_disabled_flag = ph-
> > >ph_prof_disabled_flag,
> > + .ph_joint_cbcr_sign_flag = ph-
> > >ph_joint_cbcr_sign_flag,
> > + .ph_sao_luma_enabled_flag = ph-
> > >ph_sao_luma_enabled_flag,
> > + .ph_sao_chroma_enabled_flag = ph-
> > >ph_sao_chroma_enabled_flag,
> > + .ph_deblocking_filter_disabled_flag = ph-
> > >ph_deblocking_filter_disabled_flag,
> > + },
> > + .PicMiscFlags.fields = {
> > + .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag
> > ? 0 : IS_IRAP(h) ? 1 : 0,
> > + }
> > + };
> > +
> > + fill_vaapi_pic(&pic_param->CurrPic, fc->ref);
> > + fill_vaapi_reference_frames(fc, pic_param);
> > +
> > + for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++)
> > + for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++)
> > + pic_param->ChromaQpTable[i][j] = fc->ps.sps-
> > >chroma_qp_table[i][j];
> > + for (i = 0; i < 4; i++) {
> > + pic_param->sps_ladf_qp_offset[i] = sps-
> > >sps_ladf_qp_offset[i];
> > + pic_param->sps_ladf_delta_threshold_minus1[i] = sps-
> > >sps_ladf_delta_threshold_minus1[i];
> > + }
> > +
> > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ?
> > sps->sps_num_ver_virtual_boundaries : ph-
> > >ph_num_ver_virtual_boundaries); i++) {
> > + pic_param->VirtualBoundaryPosX[i] = (sps-
> > >sps_virtual_boundaries_present_flag ?
> > + (sps-
> > >sps_virtual_boundary_pos_x_minus1[ i ] + 1) :
> > + (ph-
> > >ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8;
> > + }
> > +
> > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ?
> > sps->sps_num_hor_virtual_boundaries : ph-
> > >ph_num_hor_virtual_boundaries); i++) {
> > + pic_param->VirtualBoundaryPosY[i] = (sps-
> > >sps_virtual_boundaries_present_flag ?
> > + (sps-
> > >sps_virtual_boundary_pos_y_minus1[ i ] + 1) :
> > + (ph-
> > >ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8;
> > + }
> > +
> > + for (i = 0; i < 6; i++) {
> > + pic_param->pps_cb_qp_offset_list[i] = pps-
> > >pps_cb_qp_offset_list[i];
> > + pic_param->pps_cr_qp_offset_list[i] = pps-
> > >pps_cr_qp_offset_list[i];
> > + pic_param->pps_joint_cbcr_qp_offset_list[i] = pps-
> > >pps_joint_cbcr_qp_offset_list[i];
> > + }
> > +
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> > +
> > VAPictureParameterBufferType,
> > + &pic->pic_param,
> > sizeof(VAPictureParameterBufferVVC));
> > + if (err < 0)
> > + goto fail;
> > +
> > + for (i = 0; i <= sps->sps_num_subpics_minus1 && sps-
> > >sps_subpic_info_present_flag; i++) {
> > + VASubPicVVC subpic_param = {
> > + .sps_subpic_ctu_top_left_x = sps-
> > >sps_subpic_ctu_top_left_x[i],
> > + .sps_subpic_ctu_top_left_y = sps-
> > >sps_subpic_ctu_top_left_y[i],
> > + .sps_subpic_width_minus1 = sps-
> > >sps_subpic_width_minus1[i],
> > + .sps_subpic_height_minus1 = sps-
> > >sps_subpic_height_minus1[i],
> > + .SubpicIdVal = pps->sub_pic_id_val[i],
> > + .subpic_flags.bits = {
> > + .sps_subpic_treated_as_pic_flag = sps-
> > >sps_subpic_treated_as_pic_flag[i],
> > + .sps_loop_filter_across_subpic_enabled_flag = sps-
> > >sps_loop_filter_across_subpic_enabled_flag[i],
> > + }
> > + };
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> > +
> > VASubPicBufferType,
> > + &subpic_param,
> > sizeof(VASubPicVVC));
> > + if (err < 0)
> > + goto fail;
> > + }
> > +
> > + for (i = 0; i < VVC_MAX_ALF_COUNT; i++) {
> > + const VVCALF *alf_list = h->ps.alf_list[i];
> > + if (alf_list) {
> > + const H266RawAPS *alf = alf_list->r;
> > + VAAlfDataVVC alf_param = {
> > + .aps_adaptation_parameter_set_id = i,
> > + .alf_luma_num_filters_signalled_minus1 = alf-
> > >alf_luma_num_filters_signalled_minus1,
> > + .alf_chroma_num_alt_filters_minus1 = alf-
> > >alf_chroma_num_alt_filters_minus1,
> > + .alf_cc_cb_filters_signalled_minus1 = alf-
> > >alf_cc_cb_filters_signalled_minus1,
> > + .alf_cc_cr_filters_signalled_minus1 = alf-
> > >alf_cc_cr_filters_signalled_minus1,
> > + .alf_flags.bits = {
> > + .alf_luma_filter_signal_flag = alf-
> > >alf_luma_filter_signal_flag,
> > + .alf_chroma_filter_signal_flag = alf-
> > >alf_chroma_filter_signal_flag,
> > + .alf_cc_cb_filter_signal_flag = alf-
> > >alf_cc_cb_filter_signal_flag,
> > + .alf_cc_cr_filter_signal_flag = alf-
> > >alf_cc_cr_filter_signal_flag,
> > + .alf_luma_clip_flag = alf-
> > >alf_luma_clip_flag,
> > + .alf_chroma_clip_flag = alf-
> > >alf_chroma_clip_flag,
> > + }
> > + };
> > +
> > + for (j = 0; j < 25; j++)
> > + alf_param.alf_luma_coeff_delta_idx[j] = alf-
> > >alf_luma_coeff_delta_idx[j];
> > +
> > + for (j = 0; j < 25; j++) {
> > + for (k = 0; k < 12; k++) {
> > + alf_param.filtCoeff[j][k] = alf-
> > >alf_luma_coeff_abs[j][k] * (1 - 2 * alf-
> > >alf_luma_coeff_sign[j][k]);
> > + alf_param.alf_luma_clip_idx[j][k] = alf-
> > >alf_luma_clip_idx[j][k];
> > + }
> > + }
> > +
> > + for (j = 0; j < 8; j++) {
> > + for (k = 0; k < 6; k++) {
> > + alf_param.AlfCoeffC[j][k] = alf-
> > >alf_chroma_coeff_abs[j][k] * (1 - 2 * alf-
> > >alf_chroma_coeff_sign[j][k]);
> > + alf_param.alf_chroma_clip_idx[j][k] = alf-
> > >alf_chroma_clip_idx[j][k];
> > + }
> > + }
> > +
> > + for (j = 0; j < 4; j++) {
> > + for (k = 0; k < 7; k++) {
> > + if (alf->alf_cc_cb_mapped_coeff_abs[j][k])
> > + alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 *
> > alf->alf_cc_cb_coeff_sign[j][k]) * (1 << (alf-
> > >alf_cc_cb_mapped_coeff_abs[j][k] - 1));
> > + if (alf->alf_cc_cr_mapped_coeff_abs[j][k])
> > + alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 *
> > alf->alf_cc_cr_coeff_sign[j][k]) * (1 << (alf-
> > >alf_cc_cr_mapped_coeff_abs[j][k] - 1));
> > + }
> > + }
> > +
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic-
> > >pic,
> > +
> > VAAlfBufferType,
> > + &alf_param,
> > sizeof(VAAlfDataVVC));
> > + if (err < 0)
> > + goto fail;
> > + }
> > + }
> > +
> > + for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) {
> > + const H266RawAPS *lmcs = h->ps.lmcs_list[i];
> > + if (lmcs) {
> > + VALmcsDataVVC lmcs_param = {
> > + .aps_adaptation_parameter_set_id = i,
> > + .lmcs_min_bin_idx = lmcs-
> > >lmcs_min_bin_idx,
> > + .lmcs_delta_max_bin_idx = lmcs-
> > >lmcs_delta_max_bin_idx,
> > + .lmcsDeltaCrs = (1 - 2 * lmcs-
> > >lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs,
> > + };
> > +
> > + for (j = lmcs->lmcs_min_bin_idx; j <= 15 - lmcs-
> > >lmcs_delta_max_bin_idx; j++)
> > + lmcs_param.lmcsDeltaCW[j] = (1 - 2 * lmcs-
> > >lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j];
> > +
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic-
> > >pic,
> > +
> > VALmcsBufferType,
> > + &lmcs_param,
> > sizeof(VALmcsDataVVC));
> > + if (err < 0)
> > + goto fail;
> > + }
> > + }
> > +
> > + for (i = 0; i < VVC_MAX_SL_COUNT; i++) {
> > + const VVCScalingList *sl = h->ps.scaling_list[i];
> > + if (sl) {
> > + int l;
> > +
> > + VAScalingListVVC sl_param = {
> > + .aps_adaptation_parameter_set_id = i,
> > + };
> > +
> > + for (j = 0; j < 14; j++)
> > + sl_param.ScalingMatrixDCRec[j] = sl-
> > >scaling_matrix_dc_rec[j];
> > +
> > + for (j = 0; j < 2; j++)
> > + for (k = 0; k < 2; k++)
> > + for (l = 0; l < 2; l++)
> > + sl_param.ScalingMatrixRec2x2[j][k][l] =
> > sl->scaling_matrix_rec[j][l * 2 + k];
> > +
> > + for (j = 2; j < 8; j++)
> > + for (k = 0; k < 4; k++)
> > + for (l = 0; l < 4; l++)
> > + sl_param.ScalingMatrixRec4x4[j - 2][k][l]
> > = sl->scaling_matrix_rec[j][l * 4 + k];
> > +
> > + for (j = 8; j < 28; j++)
> > + for (k = 0; k < 8; k++)
> > + for (l = 0; l < 8; l++)
> > + sl_param.ScalingMatrixRec8x8[j - 8][k][l]
> > = sl->scaling_matrix_rec[j][l * 8 + k];
> > +
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic-
> > >pic,
> > +
> > VAIQMatrixBufferType,
> > + &sl_param,
> > sizeof(VAScalingListVVC));
> > + if (err < 0)
> > + goto fail;
> > + }
> > + }
> > +
> > + for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) {
> > + tile_dim = pps->pps_tile_column_width_minus1[i];
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> > + VATileBufferType,
> > + &tile_dim,
> > sizeof(tile_dim));
> > + if (err < 0)
> > + goto fail;
> > + }
> > +
> > + for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) {
> > + tile_dim = pps->pps_tile_row_height_minus1[i];
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic,
> > + VATileBufferType,
> > + &tile_dim,
> > sizeof(tile_dim));
> > + if (err < 0)
> > + goto fail;
> > + }
> > +
> > + if (!pps->pps_no_pic_partition_flag && pps-
> > >pps_rect_slice_flag && !pps->pps_single_slice_per_subpic_flag) {
> > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
> > + for (j = 0; j < pps->pps_num_exp_slices_in_tile[i];
> > j++) {
> > + exp_slice_height_in_ctus[i + j] = pps-
> > >pps_exp_slice_height_in_ctus_minus1[i][j] + 1;
> > + }
> > + }
> > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) {
> > + VASliceStructVVC ss_param = {
> > + .SliceTopLeftTileIdx = pps-
> > >slice_top_left_tile_idx[i],
> > + .pps_slice_width_in_tiles_minus1 = pps-
> > >pps_slice_width_in_tiles_minus1[i],
> > + .pps_slice_height_in_tiles_minus1 = pps-
> > >pps_slice_height_in_tiles_minus1[i],
> > + };
> > +
> > + if (pps->pps_slice_width_in_tiles_minus1[i] > 0 ||
> > pps->pps_slice_height_in_tiles_minus1[i] > 0)
> > + ss_param.pps_exp_slice_height_in_ctus_minus1 = 0;
> > + else {
> > + if (pps->num_slices_in_tile[i] == 1)
> > + ss_param.pps_exp_slice_height_in_ctus_minus1 =
> > pps->row_height_val[pps->slice_top_left_tile_idx[i] / pps-
> > >num_tile_columns] - 1;
> > + else if (exp_slice_height_in_ctus[i])
> > + ss_param.pps_exp_slice_height_in_ctus_minus1 =
> > exp_slice_height_in_ctus[i] - 1;
> > + else
> > + continue;
> > + }
> > +
> > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic-
> > >pic,
> > +
> > VASliceStructBufferType,
> > + &ss_param,
> > sizeof(VASliceStructVVC));
> > + if (err < 0)
> > + goto fail;
> > + }
> > + }
> > +
> > + return 0;
> > +
> > +fail:
> > + ff_vaapi_decode_cancel(avctx, &pic->pic);
> > + return err;
> > +}
> > +
> > +static uint8_t get_ref_pic_index(const VVCContext *h, const
> > VVCFrame *frame)
> > +{
> > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h-
> > >nb_fcs) % h->nb_fcs];
> > + VAAPIDecodePictureVVC *pic = fc->ref-
> > >hwaccel_picture_private;
> > + VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC
> > *)&pic->pic_param;
> > + uint8_t i;
> > +
> > + if (!frame)
> > + return 0xFF;
> > +
> > + for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
> > + VASurfaceID pid = pp->ReferenceFrames[i].picture_id;
> > + int poc = pp->ReferenceFrames[i].pic_order_cnt;
> > + if (pid != VA_INVALID_ID && pid ==
> > ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc)
> > + return i;
> > + }
> > +
> > + return 0xFF;
> > +}
> > +
> > +static int get_slice_data_offset(const uint8_t *buffer, uint32_t
> > size, const SliceContext* sc)
> > +{
> > + const H266RawSlice *slice = sc->ref;
> > + int num_identical_bytes = slice->data_size < 32 ? slice-
> > >data_size : 32;
> > +
> > + for (int i = 0; i < size; i++) {
> > + int skip_bytes = 0;
> > + if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] &&
> > !buffer[i - 2])
> > + continue;
> > +
> > + for (int j = 0; j < num_identical_bytes; j++) {
> > + if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 &&
> > !buffer[i + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2])
> > + skip_bytes++;
> > +
> > + if (buffer[i + j + skip_bytes] != slice->data[j])
> > + break;
> > +
> > + if (j + 1 == num_identical_bytes)
> > + return i;
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int vaapi_vvc_decode_slice(AVCodecContext *avctx,
> > + const uint8_t *buffer,
> > + uint32_t size)
> > +{
> > + const VVCContext *h = avctx->priv_data;
> > + VVCFrameContext *fc = &h->fcs[(h->nb_frames
> > + h->nb_fcs) % h->nb_fcs];
> > + const SliceContext *sc = fc->slices[fc-
> > >nb_slices];
> > + const H266RawPPS *pps = fc->ps.pps->r;
> > + const H266RawPictureHeader *ph = fc->ps.ph.r;
> > + const H266RawSliceHeader *sh = sc->sh.r;
> > + VAAPIDecodePictureVVC *pic = fc->ref-
> > >hwaccel_picture_private;
> > + VASliceParameterBufferVVC *slice_param = &pic->slice_param;
> > + int nb_list, i, err;
> > +
> > + *slice_param = (VASliceParameterBufferVVC) {
> > + .slice_data_size = size,
> > + .slice_data_offset = 0,
> > + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL,
> > + .slice_data_byte_offset =
> > get_slice_data_offset(buffer, size, sc),
> > + .sh_subpic_id = sh->sh_subpic_id,
> > + .sh_slice_address = sh->sh_slice_address,
> > + .sh_num_tiles_in_slice_minus1 = sh-
> > >sh_num_tiles_in_slice_minus1,
> > + .sh_slice_type = sh->sh_slice_type,
> > + .sh_num_alf_aps_ids_luma = sh-
> > >sh_num_alf_aps_ids_luma,
> > + .sh_alf_aps_id_chroma = sh-
> > >sh_alf_aps_id_chroma,
> > + .sh_alf_cc_cb_aps_id = sh-
> > >sh_alf_cc_cb_aps_id,
> > + .sh_alf_cc_cr_aps_id = sh-
> > >sh_alf_cc_cr_aps_id,
> > + .NumRefIdxActive[0] = sh-
> > >num_ref_idx_active[0],
> > + .NumRefIdxActive[1] = sh-
> > >num_ref_idx_active[1],
> > + .sh_collocated_ref_idx = sh-
> > >sh_collocated_ref_idx,
> > + .SliceQpY = pps-
> > >pps_qp_delta_info_in_ph_flag ?
> > + 26 + pps-
> > >pps_init_qp_minus26 + ph->ph_qp_delta :
> > + 26 + pps-
> > >pps_init_qp_minus26 + sh->sh_qp_delta,
> > + .sh_cb_qp_offset = sh->sh_cb_qp_offset,
> > + .sh_cr_qp_offset = sh->sh_cr_qp_offset,
> > + .sh_joint_cbcr_qp_offset = sh-
> > >sh_joint_cbcr_qp_offset,
> > + .sh_luma_beta_offset_div2 = sh-
> > >sh_luma_beta_offset_div2,
> > + .sh_luma_tc_offset_div2 = sh-
> > >sh_luma_tc_offset_div2,
> > + .sh_cb_beta_offset_div2 = sh-
> > >sh_cb_beta_offset_div2,
> > + .sh_cb_tc_offset_div2 = sh-
> > >sh_cb_tc_offset_div2,
> > + .sh_cr_beta_offset_div2 = sh-
> > >sh_cr_beta_offset_div2,
> > + .sh_cr_tc_offset_div2 = sh-
> > >sh_cr_tc_offset_div2,
> > + .WPInfo = {
> > + .luma_log2_weight_denom = sh-
> > >sh_pred_weight_table.luma_log2_weight_denom,
> > + .delta_chroma_log2_weight_denom = sh-
> > >sh_pred_weight_table.delta_chroma_log2_weight_denom,
> > + .num_l0_weights = sh-
> > >sh_pred_weight_table.num_l0_weights,
> > + .num_l1_weights = sh-
> > >sh_pred_weight_table.num_l1_weights,
> > + },
> > + .sh_flags.bits = {
> > + .sh_alf_enabled_flag = sh-
> > >sh_alf_enabled_flag,
> > + .sh_alf_cb_enabled_flag = sh-
> > >sh_alf_cb_enabled_flag,
> > + .sh_alf_cr_enabled_flag = sh-
> > >sh_alf_cr_enabled_flag,
> > + .sh_alf_cc_cb_enabled_flag = sh-
> > >sh_alf_cc_cb_enabled_flag,
> > + .sh_alf_cc_cr_enabled_flag = sh-
> > >sh_alf_cc_cr_enabled_flag,
> > + .sh_lmcs_used_flag = sh-
> > >sh_lmcs_used_flag,
> > + .sh_explicit_scaling_list_used_flag = sh-
> > >sh_explicit_scaling_list_used_flag,
> > + .sh_cabac_init_flag = sh-
> > >sh_cabac_init_flag,
> > + .sh_collocated_from_l0_flag = sh-
> > >sh_collocated_from_l0_flag,
> > + .sh_cu_chroma_qp_offset_enabled_flag = sh-
> > >sh_cu_chroma_qp_offset_enabled_flag,
> > + .sh_sao_luma_used_flag = sh-
> > >sh_sao_luma_used_flag,
> > + .sh_sao_chroma_used_flag = sh-
> > >sh_sao_chroma_used_flag,
> > + .sh_deblocking_filter_disabled_flag = sh-
> > >sh_deblocking_filter_disabled_flag,
> > + .sh_dep_quant_used_flag = sh-
> > >sh_dep_quant_used_flag,
> > + .sh_sign_data_hiding_used_flag = sh-
> > >sh_sign_data_hiding_used_flag,
> > + .sh_ts_residual_coding_disabled_flag = sh-
> > >sh_ts_residual_coding_disabled_flag,
> > + },
> > + };
> > +
> > + memset(&slice_param->RefPicList, 0xFF, sizeof(slice_param-
> > >RefPicList));
> > +
> > + nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ?
> > + 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1);
> > + for (int list_idx = 0; list_idx < nb_list; list_idx++) {
> > + RefPicList *rpl = &sc->rpl[list_idx];
> > +
> > + for (i = 0; i < rpl->nb_refs; i++)
> > + slice_param->RefPicList[list_idx][i] =
> > get_ref_pic_index(h, rpl->ref[i]);
> > + }
> > +
> > + for (i = 0; i < 7; i++)
> > + slice_param->sh_alf_aps_id_luma[i] = sh-
> > >sh_alf_aps_id_luma[i];
> > +
> > + for (i = 0; i < 15; i++) {
> > + slice_param->WPInfo.luma_weight_l0_flag[i] = sh-
> > >sh_pred_weight_table.luma_weight_l0_flag[i];
> > + slice_param->WPInfo.chroma_weight_l0_flag[i] = sh-
> > >sh_pred_weight_table.chroma_weight_l0_flag[i];
> > + slice_param->WPInfo.delta_luma_weight_l0[i] = sh-
> > >sh_pred_weight_table.delta_luma_weight_l0[i];
> > + slice_param->WPInfo.luma_offset_l0[i] = sh-
> > >sh_pred_weight_table.luma_offset_l0[i];
> > + slice_param->WPInfo.luma_weight_l1_flag[i] = sh-
> > >sh_pred_weight_table.luma_weight_l1_flag[i];
> > + slice_param->WPInfo.chroma_weight_l1_flag[i] = sh-
> > >sh_pred_weight_table.chroma_weight_l1_flag[i];
> > + slice_param->WPInfo.delta_luma_weight_l1[i] = sh-
> > >sh_pred_weight_table.delta_luma_weight_l1[i];
> > + slice_param->WPInfo.luma_offset_l1[i] = sh-
> > >sh_pred_weight_table.luma_offset_l1[i];
> > + }
> > +
> > + for (i = 0; i < 15; i++) {
> > + for (int j = 0; j < 2; j++) {
> > + slice_param->WPInfo.delta_chroma_weight_l0[i][j] = sh-
> > >sh_pred_weight_table.delta_chroma_weight_l0[i][j];
> > + slice_param->WPInfo.delta_chroma_offset_l0[i][j] = sh-
> > >sh_pred_weight_table.delta_chroma_offset_l0[i][j];
> > + slice_param->WPInfo.delta_chroma_weight_l1[i][j] = sh-
> > >sh_pred_weight_table.delta_chroma_weight_l1[i][j];
> > + slice_param->WPInfo.delta_chroma_offset_l1[i][j] = sh-
> > >sh_pred_weight_table.delta_chroma_offset_l1[i][j];
> > + }
> > + }
> > +
> > + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic,
> > + &pic->slice_param,
> > +
> > sizeof(VASliceParameterBufferVVC),
> > + buffer, size);
> > + if (err) {
> > + ff_vaapi_decode_cancel(avctx, &pic->pic);
> > + return err;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int vaapi_vvc_end_frame(AVCodecContext *avctx)
> > +{
> > +
> > + const VVCContext *h = avctx->priv_data;
> > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h-
> > >nb_fcs) % h->nb_fcs];
> > + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private;
> > + int ret;
> > +
> > + ret = ff_vaapi_decode_issue(avctx, &pic->pic);
> > + if (ret < 0)
> > + goto fail;
> > +
> > + pic->decode_issued = 1;
> > +
> > + return 0;
> > +
> > +fail:
> > + ff_vaapi_decode_cancel(avctx, &pic->pic);
> > + return ret;
> > +}
> > +
> > +const FFHWAccel ff_vvc_vaapi_hwaccel = {
> > + .p.name = "vvc_vaapi",
> > + .p.type = AVMEDIA_TYPE_VIDEO,
> > + .p.id = AV_CODEC_ID_VVC,
> > + .p.pix_fmt = AV_PIX_FMT_VAAPI,
> > + .start_frame = &vaapi_vvc_start_frame,
> > + .end_frame = &vaapi_vvc_end_frame,
> > + .decode_slice = &vaapi_vvc_decode_slice,
> > + .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC),
> > + .init = &ff_vaapi_decode_init,
> > + .uninit = &ff_vaapi_decode_uninit,
> > + .frame_params = &ff_vaapi_common_frame_params,
> > + .priv_data_size = sizeof(VAAPIDecodeContext),
> > + .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
> > +};
> > diff --git a/libavcodec/version.h b/libavcodec/version.h
> > index 06631ffa8c..7aa95fc3f1 100644
> > --- a/libavcodec/version.h
> > +++ b/libavcodec/version.h
> > @@ -29,7 +29,7 @@
> >
> > #include "version_major.h"
> >
> > -#define LIBAVCODEC_VERSION_MINOR 4
> > +#define LIBAVCODEC_VERSION_MINOR 5
> > #define LIBAVCODEC_VERSION_MICRO 100
> >
> > #define LIBAVCODEC_VERSION_INT
> > AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
> > index f2e269ce76..b204a0c73a 100644
> > --- a/libavcodec/vvc/vvcdec.c
> > +++ b/libavcodec/vvc/vvcdec.c
> > @@ -29,6 +29,7 @@
> > #include "libavutil/cpu.h"
> > #include "libavutil/thread.h"
> >
> > +#include "config_components.h"
> > #include "vvcdec.h"
> > #include "vvc_ctu.h"
> > #include "vvc_data.h"
> > @@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc,
> > VVCContext *s, VVCFrameContext *fc,
> >
> > static enum AVPixelFormat get_format(AVCodecContext *avctx, const
> > VVCSPS *sps)
> > {
> > -#define HWACCEL_MAX 0
> > +#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL
> >
> > enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
> >
> > switch (sps->pix_fmt) {
> > case AV_PIX_FMT_YUV420P:
> > +#if CONFIG_VVC_VAAPI_HWACCEL
> > + *fmt++ = AV_PIX_FMT_VAAPI;
> > +#endif
> > break;
> > case AV_PIX_FMT_YUV420P10:
> > +#if CONFIG_VVC_VAAPI_HWACCEL
> > + *fmt++ = AV_PIX_FMT_VAAPI;
> > +#endif
> > break;
> > }
> >
> > @@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = {
> > .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING |
> > FF_CODEC_CAP_INIT_CLEANUP |
> > FF_CODEC_CAP_AUTO_THREADS,
> > .p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles),
> > + .hw_configs = (const AVCodecHWConfigInternal *const []) {
> > +#if CONFIG_VVC_VAAPI_HWACCEL
> > + HWACCEL_VAAPI(vvc),
> > +#endif
> > + NULL
> > + },
> > };
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder
2024-04-03 3:31 ` Wang, Fei W
@ 2024-04-06 5:03 ` Nuo Mi
0 siblings, 0 replies; 14+ messages in thread
From: Nuo Mi @ 2024-04-06 5:03 UTC (permalink / raw)
To: Wang, Fei W; +Cc: ffmpeg-devel
>
> > > --- a/libavcodec/vaapi_decode.c
> > > +++ b/libavcodec/vaapi_decode.c
> > > @@ -455,6 +455,9 @@ static const struct {
> > > MAP(AV1, AV1_MAIN, AV1Profile0),
> > > MAP(AV1, AV1_HIGH, AV1Profile1),
> > > #endif
> > > +#if VA_CHECK_VERSION(1, 22, 0)
> > > + MAP(H266, VVC_MAIN_10, VVCMain10),
> > > +#endif
> > >
> > > #undef MAP
> > > };
> > > @@ -627,6 +630,10 @@ static int
> > > vaapi_decode_make_config(AVCodecContext *avctx,
> > > case AV_CODEC_ID_VP8:
> > > frames->initial_pool_size += 3;
> > > break;
> > > + case AV_CODEC_ID_H266:
> > > + // Add additional 16 for maximum 16 frames delay in
> > > vvc native decode.
> > > + frames->initial_pool_size += 32;
> >
> > One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames
> > amount to approximately 6GB.Can we dynamically allocate the buffer
> > pool?
>
> It's processing in other thread:
> https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=11316
>
> >
> > The software decoder requires a delay of 16 frames to ensure full
> > utilization of CPUs. In the future, we may consider increasing this
> > to 32 or even 64 frames.
> > However, for hardware decoding, given that all processing occurs on
> > the GPU, we do not require any delay.
>
> The delay can avoid sync hardware task immediately once it is
> submitted, which can avoid hardware switch tasks frequently and drop
> performance. If the number will increase, I'd prefer to set it as an
> option and diff the default value for hardware with software.
Why does VVC require such a large frame pool while other hardware codecs do
not?
What makes VVC so special?"
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2024-04-06 5:04 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com
2024-03-28 2:04 ` Andreas Rheinhardt
2024-04-02 6:24 ` Wang, Fei W
2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com
2024-04-02 12:48 ` Nuo Mi
2024-04-03 3:31 ` Wang, Fei W
2024-04-06 5:03 ` Nuo Mi
2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson
2024-04-02 6:16 ` Wang, Fei W
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git