* [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically @ 2024-03-28 1:26 fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com ` (6 more replies) 0 siblings, 7 replies; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++------- libavcodec/vaapi_decode.h | 7 ++----- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index cca94b5336..1b1972a2a9 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -38,12 +38,23 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, { VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data; VAStatus vas; - VABufferID buffer; - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS); + av_assert0(pic->nb_param_buffers <= pic->param_allocated); + if (pic->nb_param_buffers == pic->param_allocated) { + pic->param_buffers = + av_realloc_array(pic->param_buffers, + pic->param_allocated + 16, + sizeof(*pic->param_buffers)); + if (!pic->param_buffers) + return AVERROR(ENOMEM); + + pic->param_allocated += 16; + } + av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated); vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, - type, size, 1, (void*)data, &buffer); + type, size, 1, (void*)data, + &pic->param_buffers[pic->nb_param_buffers]); if (vas != VA_STATUS_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to create parameter " "buffer (type %d): %d (%s).\n", @@ -51,14 +62,14 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, return AVERROR(EIO); } - pic->param_buffers[pic->nb_param_buffers++] = buffer; - av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes) " - "is %#x.\n", type, size, buffer); + "is %#x.\n", type, size, pic->param_buffers[pic->nb_param_buffers]); + + ++pic->nb_param_buffers; + return 0; } - int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx, VAAPIDecodePicture *pic, const void *params_data, @@ -215,6 +226,8 @@ fail: fail_at_end: exit: pic->nb_param_buffers = 0; + pic->param_allocated = 0; + av_freep(&pic->param_buffers); pic->nb_slices = 0; pic->slices_allocated = 0; av_freep(&pic->slice_buffers); @@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext *avctx, ff_vaapi_decode_destroy_buffers(avctx, pic); pic->nb_param_buffers = 0; + pic->param_allocated = 0; + av_freep(&pic->param_buffers); pic->nb_slices = 0; pic->slices_allocated = 0; av_freep(&pic->slice_buffers); diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h index 6beda14e52..a41d7ff2ff 100644 --- a/libavcodec/vaapi_decode.h +++ b/libavcodec/vaapi_decode.h @@ -32,15 +32,12 @@ static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic) return (uintptr_t)pic->data[3]; } -enum { - MAX_PARAM_BUFFERS = 16, -}; - typedef struct VAAPIDecodePicture { VASurfaceID output_surface; int nb_param_buffers; - VABufferID param_buffers[MAX_PARAM_BUFFERS]; + VABufferID *param_buffers; + int param_allocated; int nb_slices; VABufferID *slice_buffers; -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com ` (5 subsequent siblings) 6 siblings, 0 replies; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> So that hardware decoder can use the flags too. Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/vvc/vvc_refs.c | 4 ---- libavcodec/vvc/vvc_refs.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c index afcfc09da7..bf70777550 100644 --- a/libavcodec/vvc/vvc_refs.c +++ b/libavcodec/vvc/vvc_refs.c @@ -28,10 +28,6 @@ #include "vvc_refs.h" -#define VVC_FRAME_FLAG_OUTPUT (1 << 0) -#define VVC_FRAME_FLAG_SHORT_REF (1 << 1) -#define VVC_FRAME_FLAG_LONG_REF (1 << 2) -#define VVC_FRAME_FLAG_BUMPING (1 << 3) typedef struct FrameProgress { atomic_int progress[VVC_PROGRESS_LAST]; diff --git a/libavcodec/vvc/vvc_refs.h b/libavcodec/vvc/vvc_refs.h index eba4422fb4..509fc6af22 100644 --- a/libavcodec/vvc/vvc_refs.h +++ b/libavcodec/vvc/vvc_refs.h @@ -25,6 +25,11 @@ #include "vvcdec.h" +#define VVC_FRAME_FLAG_OUTPUT (1 << 0) +#define VVC_FRAME_FLAG_SHORT_REF (1 << 1) +#define VVC_FRAME_FLAG_LONG_REF (1 << 2) +#define VVC_FRAME_FLAG_BUMPING (1 << 3) + int ff_vvc_output_frame(VVCContext *s, VVCFrameContext *fc, struct AVFrame *out, int no_output_of_prior_pics_flag, int flush); void ff_vvc_bump_frame(VVCContext *s, VVCFrameContext *fc); int ff_vvc_set_new_ref(VVCContext *s, VVCFrameContext *fc, struct AVFrame **frame); -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com ` (4 subsequent siblings) 6 siblings, 0 replies; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/cbs_h266.h | 1 + libavcodec/cbs_h266_syntax_template.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h index 73d94157d4..19f83aeb49 100644 --- a/libavcodec/cbs_h266.h +++ b/libavcodec/cbs_h266.h @@ -593,6 +593,7 @@ typedef struct H266RawPPS { uint16_t sub_pic_id_val[VVC_MAX_SLICES]; uint16_t col_width_val[VVC_MAX_TILE_COLUMNS]; uint16_t row_height_val[VVC_MAX_TILE_ROWS]; + uint16_t slice_top_left_tile_idx[VVC_MAX_SLICES]; } H266RawPPS; typedef struct H266RawAPS { diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c index 0aae9fdfd5..12f821b3fd 100644 --- a/libavcodec/cbs_h266_syntax_template.c +++ b/libavcodec/cbs_h266_syntax_template.c @@ -1945,6 +1945,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, else infer(pps_tile_idx_delta_present_flag, 0); for (i = 0; i < current->pps_num_slices_in_pic_minus1; i++) { + current->slice_top_left_tile_idx[i] = tile_idx; tile_x = tile_idx % current->num_tile_columns; tile_y = tile_idx / current->num_tile_columns; if (tile_x != current->num_tile_columns - 1) { @@ -2027,6 +2028,8 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, } num_slices_in_tile = j; } + for (int k = 0; k < num_slices_in_tile; k++) + current->slice_top_left_tile_idx[i + k] = tile_idx; i += num_slices_in_tile - 1; } else { uint16_t height = 0; @@ -2070,6 +2073,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, if (i == current->pps_num_slices_in_pic_minus1) { uint16_t height = 0; + current->slice_top_left_tile_idx[i] = tile_idx; tile_x = tile_idx % current->num_tile_columns; tile_y = tile_idx / current->num_tile_columns; if (tile_y >= current->num_tile_rows) -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile to H266RawPPS 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com ` (3 subsequent siblings) 6 siblings, 0 replies; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/cbs_h266.h | 1 + libavcodec/cbs_h266_syntax_template.c | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h index 19f83aeb49..87bd39c6f5 100644 --- a/libavcodec/cbs_h266.h +++ b/libavcodec/cbs_h266.h @@ -594,6 +594,7 @@ typedef struct H266RawPPS { uint16_t col_width_val[VVC_MAX_TILE_COLUMNS]; uint16_t row_height_val[VVC_MAX_TILE_ROWS]; uint16_t slice_top_left_tile_idx[VVC_MAX_SLICES]; + uint16_t num_slices_in_tile[VVC_MAX_SLICES]; } H266RawPPS; typedef struct H266RawAPS { diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c index 12f821b3fd..3f378d199e 100644 --- a/libavcodec/cbs_h266_syntax_template.c +++ b/libavcodec/cbs_h266_syntax_template.c @@ -1976,14 +1976,13 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, if (current->pps_slice_width_in_tiles_minus1[i] == 0 && current->pps_slice_height_in_tiles_minus1[i] == 0 && current->row_height_val[tile_y] > 1) { - int num_slices_in_tile, - uniform_slice_height, remaining_height_in_ctbs_y; + int uniform_slice_height, remaining_height_in_ctbs_y; remaining_height_in_ctbs_y = current->row_height_val[tile_y]; ues(pps_num_exp_slices_in_tile[i], 0, current->row_height_val[tile_y] - 1, 1, i); if (current->pps_num_exp_slices_in_tile[i] == 0) { - num_slices_in_tile = 1; + current->num_slices_in_tile[i] = 1; current->slice_height_in_ctus[i] = current->row_height_val[tile_y]; slice_top_left_ctu_x[i] = ctu_x; slice_top_left_ctu_y[i] = ctu_y; @@ -2026,14 +2025,18 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, slice_top_left_ctu_y[i + j] = ctu_y; j++; } - num_slices_in_tile = j; + current->num_slices_in_tile[i] = j; } - for (int k = 0; k < num_slices_in_tile; k++) + for (int k = 0; k < current->num_slices_in_tile[i]; k++) current->slice_top_left_tile_idx[i + k] = tile_idx; - i += num_slices_in_tile - 1; + i += current->num_slices_in_tile[i] - 1; } else { uint16_t height = 0; infer(pps_num_exp_slices_in_tile[i], 0); + if (current->pps_slice_width_in_tiles_minus1[i] == 0 && + current->pps_slice_height_in_tiles_minus1[i] == 0) + current->num_slices_in_tile[i] = 1; + for (j = 0; j <= current->pps_slice_height_in_tiles_minus1[i]; j++) { @@ -2074,6 +2077,7 @@ static int FUNC(pps) (CodedBitstreamContext *ctx, RWContext *rw, uint16_t height = 0; current->slice_top_left_tile_idx[i] = tile_idx; + current->num_slices_in_tile[i] = 1; tile_x = tile_idx % current->num_tile_columns; tile_y = tile_idx / current->num_tile_columns; if (tile_y >= current->num_tile_rows) -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com ` (2 preceding siblings ...) 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com ` (2 subsequent siblings) 6 siblings, 0 replies; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/vvc/vvc_ps.c | 10 +++++++++- libavcodec/vvc/vvc_ps.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libavcodec/vvc/vvc_ps.c b/libavcodec/vvc/vvc_ps.c index 7972803da6..97eef85be9 100644 --- a/libavcodec/vvc/vvc_ps.c +++ b/libavcodec/vvc/vvc_ps.c @@ -873,13 +873,21 @@ static void alf_derive(VVCALF *alf, const H266RawAPS *aps) alf_cc(alf, aps); } +static void alf_free(FFRefStructOpaque unused, void *obj) +{ + VVCALF *alf = obj; + + ff_refstruct_unref(&alf->r); +} + static int aps_decode_alf(const VVCALF **alf, const H266RawAPS *aps) { - VVCALF *a = ff_refstruct_allocz(sizeof(*a)); + VVCALF *a = ff_refstruct_alloc_ext(sizeof(*a), 0, NULL, alf_free); if (!a) return AVERROR(ENOMEM); alf_derive(a, aps); + ff_refstruct_replace(&a->r, aps); ff_refstruct_replace(alf, a); ff_refstruct_unref(&a); diff --git a/libavcodec/vvc/vvc_ps.h b/libavcodec/vvc/vvc_ps.h index 1164d0eab6..d306e0354a 100644 --- a/libavcodec/vvc/vvc_ps.h +++ b/libavcodec/vvc/vvc_ps.h @@ -159,6 +159,7 @@ typedef struct VVCPH { #define ALF_NUM_COEFF_CC 7 typedef struct VVCALF { + const H266RawAPS *r; int16_t luma_coeff [ALF_NUM_FILTERS_LUMA][ALF_NUM_COEFF_LUMA]; uint8_t luma_clip_idx [ALF_NUM_FILTERS_LUMA][ALF_NUM_COEFF_LUMA]; -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com ` (3 preceding siblings ...) 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-03-28 2:04 ` Andreas Rheinhardt 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com 2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson 6 siblings, 1 reply; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- libavcodec/vvc/vvc_refs.c | 6 ++++ libavcodec/vvc/vvcdec.c | 67 +++++++++++++++++++++++++++++++++++---- libavcodec/vvc/vvcdec.h | 5 +++ 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c index bf70777550..c9f89a5a0a 100644 --- a/libavcodec/vvc/vvc_refs.c +++ b/libavcodec/vvc/vvc_refs.c @@ -25,6 +25,7 @@ #include "libavutil/thread.h" #include "libavcodec/refstruct.h" #include "libavcodec/thread.h" +#include "libavcodec/decode.h" #include "vvc_refs.h" @@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags) ff_refstruct_unref(&frame->rpl_tab); frame->collocated_ref = NULL; + ff_refstruct_unref(&frame->hwaccel_picture_private); } } @@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc) if (!frame->progress) goto fail; + ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame->hwaccel_picture_private); + if (ret < 0) + goto fail; + return frame; fail: ff_vvc_unref_frame(fc, frame, ~0); diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c index d5704aca25..f2e269ce76 100644 --- a/libavcodec/vvc/vvcdec.c +++ b/libavcodec/vvc/vvcdec.c @@ -24,6 +24,8 @@ #include "libavcodec/decode.h" #include "libavcodec/profiles.h" #include "libavcodec/refstruct.h" +#include "libavcodec/hwconfig.h" +#include "libavcodec/hwaccel_internal.h" #include "libavutil/cpu.h" #include "libavutil/thread.h" @@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src) ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab); ff_refstruct_replace(&dst->rpl, src->rpl); + ff_refstruct_replace(&dst->hwaccel_picture_private, + src->hwaccel_picture_private); dst->nb_rpl_elems = src->nb_rpl_elems; dst->poc = src->poc; @@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc, return 0; } +static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps) +{ +#define HWACCEL_MAX 0 + + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; + + switch (sps->pix_fmt) { + case AV_PIX_FMT_YUV420P: + break; + case AV_PIX_FMT_YUV420P10: + break; + } + + *fmt++ = sps->pix_fmt; + *fmt = AV_PIX_FMT_NONE; + + return ff_get_format(avctx, pix_fmts); +} + static void export_frame_params(VVCContext *s, const VVCFrameContext *fc) { AVCodecContext *c = s->avctx; const VVCSPS *sps = fc->ps.sps; const VVCPPS *pps = fc->ps.pps; - c->pix_fmt = sps->pix_fmt; - c->coded_width = pps->width; - c->coded_height = pps->height; - c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]); - c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]); + // Reset HW config if pix_fmt/w/h change. + if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width || c->coded_height != pps->height) { + c->coded_width = pps->width; + c->coded_height = pps->height; + c->pix_fmt = get_format(c, sps); + s->pix_fmt = sps->pix_fmt; + } + + c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]); + c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]); } static int frame_setup(VVCFrameContext *fc, VVCContext *s) @@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s, VVCFrameContext *fc, const H2645NAL *nal, ret = slice_init_entry_points(sc, fc, nal, unit); if (ret < 0) return ret; + + if (s->avctx->hwaccel) { + if (is_first_slice) { + ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0); + if (ret < 0) + return ret; + } + + ret = FF_HW_CALL(s->avctx, decode_slice, + nal->raw_data, nal->raw_size); + if (ret < 0) + return ret; + } + fc->nb_slices++; return 0; @@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output) static int submit_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *output, int *got_output) { int ret; + + if (s->avctx->hwaccel) { + if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "Hardware accelerator failed to decode picture\n"); + ff_vvc_unref_frame(fc, fc->ref, ~0); + return ret; + } + } else + ff_vvc_frame_submit(s, fc); + s->nb_frames++; s->nb_delayed++; - ff_vvc_frame_submit(s, fc); + if (s->nb_delayed >= s->nb_fcs) { if ((ret = wait_delayed_frame(s, output, got_output)) < 0) return ret; @@ -1027,6 +1080,8 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx) GDR_SET_RECOVERED(s); ff_thread_once(&init_static_once, init_default_scale_m); + s->pix_fmt = AV_PIX_FMT_NONE; + return 0; } diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h index aa3d715524..009d57424e 100644 --- a/libavcodec/vvc/vvcdec.h +++ b/libavcodec/vvc/vvcdec.h @@ -78,6 +78,9 @@ typedef struct VVCFrame { * A combination of VVC_FRAME_FLAG_* */ uint8_t flags; + + AVBufferRef *hwaccel_priv_buf; + void *hwaccel_picture_private; ///< hardware accelerator private data } VVCFrame; typedef struct SliceContext { @@ -224,6 +227,8 @@ typedef struct VVCContext { uint64_t nb_frames; ///< processed frames int nb_delayed; ///< delayed frames + + enum AVPixelFormat pix_fmt; ///< pix format of current frame } VVCContext ; #endif /* AVCODEC_VVC_VVCDEC_H */ -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com @ 2024-03-28 2:04 ` Andreas Rheinhardt 2024-04-02 6:24 ` Wang, Fei W 0 siblings, 1 reply; 14+ messages in thread From: Andreas Rheinhardt @ 2024-03-28 2:04 UTC (permalink / raw) To: ffmpeg-devel fei.w.wang-at-intel.com@ffmpeg.org: > From: Fei Wang <fei.w.wang@intel.com> > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > --- > libavcodec/vvc/vvc_refs.c | 6 ++++ > libavcodec/vvc/vvcdec.c | 67 +++++++++++++++++++++++++++++++++++---- > libavcodec/vvc/vvcdec.h | 5 +++ > 3 files changed, 72 insertions(+), 6 deletions(-) > > diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c > index bf70777550..c9f89a5a0a 100644 > --- a/libavcodec/vvc/vvc_refs.c > +++ b/libavcodec/vvc/vvc_refs.c > @@ -25,6 +25,7 @@ > #include "libavutil/thread.h" > #include "libavcodec/refstruct.h" > #include "libavcodec/thread.h" > +#include "libavcodec/decode.h" > > #include "vvc_refs.h" > > @@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags) > ff_refstruct_unref(&frame->rpl_tab); > > frame->collocated_ref = NULL; > + ff_refstruct_unref(&frame->hwaccel_picture_private); > } > } > > @@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc) > if (!frame->progress) > goto fail; > > + ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame->hwaccel_picture_private); > + if (ret < 0) > + goto fail; > + > return frame; > fail: > ff_vvc_unref_frame(fc, frame, ~0); > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c > index d5704aca25..f2e269ce76 100644 > --- a/libavcodec/vvc/vvcdec.c > +++ b/libavcodec/vvc/vvcdec.c > @@ -24,6 +24,8 @@ > #include "libavcodec/decode.h" > #include "libavcodec/profiles.h" > #include "libavcodec/refstruct.h" > +#include "libavcodec/hwconfig.h" > +#include "libavcodec/hwaccel_internal.h" > #include "libavutil/cpu.h" > #include "libavutil/thread.h" > > @@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src) > > ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab); > ff_refstruct_replace(&dst->rpl, src->rpl); > + ff_refstruct_replace(&dst->hwaccel_picture_private, > + src->hwaccel_picture_private); > dst->nb_rpl_elems = src->nb_rpl_elems; > > dst->poc = src->poc; > @@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc, > return 0; > } > > +static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps) > +{ > +#define HWACCEL_MAX 0 > + > + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; > + > + switch (sps->pix_fmt) { > + case AV_PIX_FMT_YUV420P: > + break; > + case AV_PIX_FMT_YUV420P10: > + break; > + } > + > + *fmt++ = sps->pix_fmt; > + *fmt = AV_PIX_FMT_NONE; > + > + return ff_get_format(avctx, pix_fmts); > +} > + > static void export_frame_params(VVCContext *s, const VVCFrameContext *fc) > { > AVCodecContext *c = s->avctx; > const VVCSPS *sps = fc->ps.sps; > const VVCPPS *pps = fc->ps.pps; > > - c->pix_fmt = sps->pix_fmt; > - c->coded_width = pps->width; > - c->coded_height = pps->height; > - c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]); > - c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]); > + // Reset HW config if pix_fmt/w/h change. > + if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width || c->coded_height != pps->height) { > + c->coded_width = pps->width; > + c->coded_height = pps->height; > + c->pix_fmt = get_format(c, sps); > + s->pix_fmt = sps->pix_fmt; > + } > + > + c->width = pps->width - ((pps->r->pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]); > + c->height = pps->height - ((pps->r->pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]); > } > > static int frame_setup(VVCFrameContext *fc, VVCContext *s) > @@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s, VVCFrameContext *fc, const H2645NAL *nal, > ret = slice_init_entry_points(sc, fc, nal, unit); > if (ret < 0) > return ret; > + > + if (s->avctx->hwaccel) { > + if (is_first_slice) { > + ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0); > + if (ret < 0) > + return ret; > + } > + > + ret = FF_HW_CALL(s->avctx, decode_slice, > + nal->raw_data, nal->raw_size); > + if (ret < 0) > + return ret; > + } > + > fc->nb_slices++; > > return 0; > @@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output) > static int submit_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *output, int *got_output) > { > int ret; > + > + if (s->avctx->hwaccel) { > + if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) { > + av_log(s->avctx, AV_LOG_ERROR, > + "Hardware accelerator failed to decode picture\n"); > + ff_vvc_unref_frame(fc, fc->ref, ~0); > + return ret; > + } > + } else > + ff_vvc_frame_submit(s, fc); > + > s->nb_frames++; > s->nb_delayed++; > - ff_vvc_frame_submit(s, fc); > + > if (s->nb_delayed >= s->nb_fcs) { > if ((ret = wait_delayed_frame(s, output, got_output)) < 0) > return ret; > @@ -1027,6 +1080,8 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx) > GDR_SET_RECOVERED(s); > ff_thread_once(&init_static_once, init_default_scale_m); > > + s->pix_fmt = AV_PIX_FMT_NONE; > + > return 0; > } > > diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h > index aa3d715524..009d57424e 100644 > --- a/libavcodec/vvc/vvcdec.h > +++ b/libavcodec/vvc/vvcdec.h > @@ -78,6 +78,9 @@ typedef struct VVCFrame { > * A combination of VVC_FRAME_FLAG_* > */ > uint8_t flags; > + > + AVBufferRef *hwaccel_priv_buf; Seems unused (hwaccel_picture_private uses RefStruct nowadays; no other decoder uses the AVBuffer API for this any more). > + void *hwaccel_picture_private; ///< hardware accelerator private data > } VVCFrame; > > typedef struct SliceContext { > @@ -224,6 +227,8 @@ typedef struct VVCContext { > > uint64_t nb_frames; ///< processed frames > int nb_delayed; ///< delayed frames > + > + enum AVPixelFormat pix_fmt; ///< pix format of current frame > } VVCContext ; > > #endif /* AVCODEC_VVC_VVCDEC_H */ _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API 2024-03-28 2:04 ` Andreas Rheinhardt @ 2024-04-02 6:24 ` Wang, Fei W 0 siblings, 0 replies; 14+ messages in thread From: Wang, Fei W @ 2024-04-02 6:24 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Wang, Fei W On Thu, 2024-03-28 at 03:04 +0100, Andreas Rheinhardt wrote: > fei.w.wang-at-intel.com@ffmpeg.org: > > From: Fei Wang <fei.w.wang@intel.com> > > > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > > --- > > libavcodec/vvc/vvc_refs.c | 6 ++++ > > libavcodec/vvc/vvcdec.c | 67 > > +++++++++++++++++++++++++++++++++++---- > > libavcodec/vvc/vvcdec.h | 5 +++ > > 3 files changed, 72 insertions(+), 6 deletions(-) > > > > diff --git a/libavcodec/vvc/vvc_refs.c b/libavcodec/vvc/vvc_refs.c > > index bf70777550..c9f89a5a0a 100644 > > --- a/libavcodec/vvc/vvc_refs.c > > +++ b/libavcodec/vvc/vvc_refs.c > > @@ -25,6 +25,7 @@ > > #include "libavutil/thread.h" > > #include "libavcodec/refstruct.h" > > #include "libavcodec/thread.h" > > +#include "libavcodec/decode.h" > > > > #include "vvc_refs.h" > > > > @@ -56,6 +57,7 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, > > VVCFrame *frame, int flags) > > ff_refstruct_unref(&frame->rpl_tab); > > > > frame->collocated_ref = NULL; > > + ff_refstruct_unref(&frame->hwaccel_picture_private); > > } > > } > > > > @@ -138,6 +140,10 @@ static VVCFrame *alloc_frame(VVCContext *s, > > VVCFrameContext *fc) > > if (!frame->progress) > > goto fail; > > > > + ret = ff_hwaccel_frame_priv_alloc(s->avctx, &frame- > > >hwaccel_picture_private); > > + if (ret < 0) > > + goto fail; > > + > > return frame; > > fail: > > ff_vvc_unref_frame(fc, frame, ~0); > > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c > > index d5704aca25..f2e269ce76 100644 > > --- a/libavcodec/vvc/vvcdec.c > > +++ b/libavcodec/vvc/vvcdec.c > > @@ -24,6 +24,8 @@ > > #include "libavcodec/decode.h" > > #include "libavcodec/profiles.h" > > #include "libavcodec/refstruct.h" > > +#include "libavcodec/hwconfig.h" > > +#include "libavcodec/hwaccel_internal.h" > > #include "libavutil/cpu.h" > > #include "libavutil/thread.h" > > > > @@ -563,6 +565,8 @@ static int ref_frame(VVCFrame *dst, const > > VVCFrame *src) > > > > ff_refstruct_replace(&dst->rpl_tab, src->rpl_tab); > > ff_refstruct_replace(&dst->rpl, src->rpl); > > + ff_refstruct_replace(&dst->hwaccel_picture_private, > > + src->hwaccel_picture_private); > > dst->nb_rpl_elems = src->nb_rpl_elems; > > > > dst->poc = src->poc; > > @@ -718,17 +722,41 @@ static int slice_start(SliceContext *sc, > > VVCContext *s, VVCFrameContext *fc, > > return 0; > > } > > > > +static enum AVPixelFormat get_format(AVCodecContext *avctx, const > > VVCSPS *sps) > > +{ > > +#define HWACCEL_MAX 0 > > + > > + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; > > + > > + switch (sps->pix_fmt) { > > + case AV_PIX_FMT_YUV420P: > > + break; > > + case AV_PIX_FMT_YUV420P10: > > + break; > > + } > > + > > + *fmt++ = sps->pix_fmt; > > + *fmt = AV_PIX_FMT_NONE; > > + > > + return ff_get_format(avctx, pix_fmts); > > +} > > + > > static void export_frame_params(VVCContext *s, const > > VVCFrameContext *fc) > > { > > AVCodecContext *c = s->avctx; > > const VVCSPS *sps = fc->ps.sps; > > const VVCPPS *pps = fc->ps.pps; > > > > - c->pix_fmt = sps->pix_fmt; > > - c->coded_width = pps->width; > > - c->coded_height = pps->height; > > - c->width = pps->width - ((pps->r- > > >pps_conf_win_left_offset + pps->r->pps_conf_win_right_offset) << > > sps->hshift[CHROMA]); > > - c->height = pps->height - ((pps->r- > > >pps_conf_win_top_offset + pps->r->pps_conf_win_bottom_offset) << > > sps->vshift[CHROMA]); > > + // Reset HW config if pix_fmt/w/h change. > > + if (s->pix_fmt != sps->pix_fmt || c->coded_width != pps->width > > || c->coded_height != pps->height) { > > + c->coded_width = pps->width; > > + c->coded_height = pps->height; > > + c->pix_fmt = get_format(c, sps); > > + s->pix_fmt = sps->pix_fmt; > > + } > > + > > + c->width = pps->width - ((pps->r->pps_conf_win_left_offset + > > pps->r->pps_conf_win_right_offset) << sps->hshift[CHROMA]); > > + c->height = pps->height - ((pps->r->pps_conf_win_top_offset + > > pps->r->pps_conf_win_bottom_offset) << sps->vshift[CHROMA]); > > } > > > > static int frame_setup(VVCFrameContext *fc, VVCContext *s) > > @@ -771,6 +799,20 @@ static int decode_slice(VVCContext *s, > > VVCFrameContext *fc, const H2645NAL *nal, > > ret = slice_init_entry_points(sc, fc, nal, unit); > > if (ret < 0) > > return ret; > > + > > + if (s->avctx->hwaccel) { > > + if (is_first_slice) { > > + ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0); > > + if (ret < 0) > > + return ret; > > + } > > + > > + ret = FF_HW_CALL(s->avctx, decode_slice, > > + nal->raw_data, nal->raw_size); > > + if (ret < 0) > > + return ret; > > + } > > + > > fc->nb_slices++; > > > > return 0; > > @@ -885,9 +927,20 @@ static int wait_delayed_frame(VVCContext *s, > > AVFrame *output, int *got_output) > > static int submit_frame(VVCContext *s, VVCFrameContext *fc, > > AVFrame *output, int *got_output) > > { > > int ret; > > + > > + if (s->avctx->hwaccel) { > > + if (ret = FF_HW_SIMPLE_CALL(s->avctx, end_frame) < 0) { > > + av_log(s->avctx, AV_LOG_ERROR, > > + "Hardware accelerator failed to decode > > picture\n"); > > + ff_vvc_unref_frame(fc, fc->ref, ~0); > > + return ret; > > + } > > + } else > > + ff_vvc_frame_submit(s, fc); > > + > > s->nb_frames++; > > s->nb_delayed++; > > - ff_vvc_frame_submit(s, fc); > > + > > if (s->nb_delayed >= s->nb_fcs) { > > if ((ret = wait_delayed_frame(s, output, got_output)) < 0) > > return ret; > > @@ -1027,6 +1080,8 @@ static av_cold int > > vvc_decode_init(AVCodecContext *avctx) > > GDR_SET_RECOVERED(s); > > ff_thread_once(&init_static_once, init_default_scale_m); > > > > + s->pix_fmt = AV_PIX_FMT_NONE; > > + > > return 0; > > } > > > > diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h > > index aa3d715524..009d57424e 100644 > > --- a/libavcodec/vvc/vvcdec.h > > +++ b/libavcodec/vvc/vvcdec.h > > @@ -78,6 +78,9 @@ typedef struct VVCFrame { > > * A combination of VVC_FRAME_FLAG_* > > */ > > uint8_t flags; > > + > > + AVBufferRef *hwaccel_priv_buf; > > Seems unused (hwaccel_picture_private uses RefStruct nowadays; no > other > decoder uses the AVBuffer API for this any more). Thanks, will fix in next version. Fei > > > + void *hwaccel_picture_private; ///< hardware accelerator > > private data > > } VVCFrame; > > > > typedef struct SliceContext { > > @@ -224,6 +227,8 @@ typedef struct VVCContext { > > > > uint64_t nb_frames; ///< processed frames > > int nb_delayed; ///< delayed frames > > + > > + enum AVPixelFormat pix_fmt; ///< pix format of current frame > > } VVCContext ; > > > > #endif /* AVCODEC_VVC_VVCDEC_H */ > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com ` (4 preceding siblings ...) 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com @ 2024-03-28 1:26 ` fei.w.wang-at-intel.com 2024-04-02 12:48 ` Nuo Mi 2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson 6 siblings, 1 reply; 14+ messages in thread From: fei.w.wang-at-intel.com @ 2024-03-28 1:26 UTC (permalink / raw) To: ffmpeg-devel; +Cc: fei.w.wang From: Fei Wang <fei.w.wang@intel.com> Signed-off-by: Fei Wang <fei.w.wang@intel.com> --- Changelog | 4 + configure | 3 + libavcodec/Makefile | 1 + libavcodec/hwaccels.h | 1 + libavcodec/vaapi_decode.c | 7 + libavcodec/vaapi_vvc.c | 657 ++++++++++++++++++++++++++++++++++++++ libavcodec/version.h | 2 +- libavcodec/vvc/vvcdec.c | 15 +- 8 files changed, 688 insertions(+), 2 deletions(-) create mode 100644 libavcodec/vaapi_vvc.c diff --git a/Changelog b/Changelog index e83a00e35c..3108e65558 100644 --- a/Changelog +++ b/Changelog @@ -1,6 +1,10 @@ Entries are sorted chronologically from oldest to youngest within each release, releases are sorted from youngest to oldest. +version <next>: +- VVC VAAPI decoder + + version 7.0: - DXV DXT1 encoder - LEAD MCMP decoder diff --git a/configure b/configure index 2a1d22310b..d902c9adc8 100755 --- a/configure +++ b/configure @@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9" vp9_vdpau_hwaccel_select="vp9_decoder" vp9_videotoolbox_hwaccel_deps="videotoolbox" vp9_videotoolbox_hwaccel_select="vp9_decoder" +vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC" +vvc_vaapi_hwaccel_select="vvc_decoder" wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel" wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel" wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel" @@ -7250,6 +7252,7 @@ if enabled vaapi; then check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, 0)" check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC" + check_type "va/va.h va/va_dec_vvc.h" "VAPictureParameterBufferVVC" check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx check_type "va/va.h va/va_vpp.h" "VAProcFilterParameterBufferHDRToneMapping" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9ce6d445c1..343b037636 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o +OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o reverse.o diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 5171e4c7d7..88d6b9a9b5 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -82,6 +82,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel; extern const struct FFHWAccel ff_vp9_vaapi_hwaccel; extern const struct FFHWAccel ff_vp9_vdpau_hwaccel; extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel; +extern const struct FFHWAccel ff_vvc_vaapi_hwaccel; extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel; extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel; extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel; diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index 1b1972a2a9..ceeb1f1a12 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -455,6 +455,9 @@ static const struct { MAP(AV1, AV1_MAIN, AV1Profile0), MAP(AV1, AV1_HIGH, AV1Profile1), #endif +#if VA_CHECK_VERSION(1, 22, 0) + MAP(H266, VVC_MAIN_10, VVCMain10), +#endif #undef MAP }; @@ -627,6 +630,10 @@ static int vaapi_decode_make_config(AVCodecContext *avctx, case AV_CODEC_ID_VP8: frames->initial_pool_size += 3; break; + case AV_CODEC_ID_H266: + // Add additional 16 for maximum 16 frames delay in vvc native decode. + frames->initial_pool_size += 32; + break; default: frames->initial_pool_size += 2; } diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c new file mode 100644 index 0000000000..6141005688 --- /dev/null +++ b/libavcodec/vaapi_vvc.c @@ -0,0 +1,657 @@ +/* + * VVC HW decode acceleration through VA API + * + * Copyright (c) 2024 Intel Corporation + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <va/va.h> +#include <va/va_dec_vvc.h> + +#include "vvc/vvcdec.h" +#include "vvc/vvc_refs.h" +#include "hwaccel_internal.h" +#include "vaapi_decode.h" + +typedef struct VAAPIDecodePictureVVC { + VAAPIDecodePicture pic; + VAPictureParameterBufferVVC pic_param; + VASliceParameterBufferVVC slice_param; + int decode_issued; +} VAAPIDecodePictureVVC; + +static void init_vaapi_pic(VAPictureVVC *va_pic) +{ + va_pic->picture_id = VA_INVALID_ID; + va_pic->flags = VA_PICTURE_VVC_INVALID; + va_pic->pic_order_cnt = 0; +} + +static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame *pic) +{ + va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame); + va_pic->pic_order_cnt = pic->poc; + va_pic->flags = 0; + + if (pic->flags & VVC_FRAME_FLAG_LONG_REF) + va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE; +} + +static void fill_vaapi_reference_frames(const VVCFrameContext *h, VAPictureParameterBufferVVC *pp) +{ + const VVCFrame *current_picture = h->ref; + int i, j; + + for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) { + const VVCFrame *frame = NULL; + + while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) { + if ((&h->DPB[j] != current_picture ) && + (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF | VVC_FRAME_FLAG_SHORT_REF))) + frame = &h->DPB[j]; + j++; + } + + init_vaapi_pic(&pp->ReferenceFrames[i]); + + if (frame) { + VAAPIDecodePictureVVC *pic; + fill_vaapi_pic(&pp->ReferenceFrames[i], frame); + pic = frame->hwaccel_picture_private; + if (!pic->decode_issued) + pp->ReferenceFrames[i].flags |= VA_PICTURE_VVC_UNAVAILABLE_REFERENCE; + } + } +} + +static int vaapi_vvc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const VVCContext *h = avctx->priv_data; + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs]; + const H266RawSPS *sps = fc->ps.sps->r; + const H266RawPPS *pps = fc->ps.pps->r; + const H266RawPictureHeader *ph = fc->ps.ph.r; + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; + VAPictureParameterBufferVVC *pic_param = &pic->pic_param; + uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] = {0}; + int i, j, k, err; + + pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref->frame); + + *pic_param = (VAPictureParameterBufferVVC) { + .pps_pic_width_in_luma_samples = pps->pps_pic_width_in_luma_samples, + .pps_pic_height_in_luma_samples = pps->pps_pic_height_in_luma_samples, + .sps_num_subpics_minus1 = sps->sps_num_subpics_minus1, + .sps_chroma_format_idc = sps->sps_chroma_format_idc, + .sps_bitdepth_minus8 = sps->sps_bitdepth_minus8, + .sps_log2_ctu_size_minus5 = sps->sps_log2_ctu_size_minus5, + .sps_log2_min_luma_coding_block_size_minus2 = sps->sps_log2_min_luma_coding_block_size_minus2, + .sps_log2_transform_skip_max_size_minus2 = sps->sps_log2_transform_skip_max_size_minus2, + .sps_six_minus_max_num_merge_cand = sps->sps_six_minus_max_num_merge_cand, + .sps_five_minus_max_num_subblock_merge_cand = sps->sps_five_minus_max_num_subblock_merge_cand, + .sps_max_num_merge_cand_minus_max_num_gpm_cand = sps->sps_max_num_merge_cand_minus_max_num_gpm_cand, + .sps_log2_parallel_merge_level_minus2 = sps->sps_log2_parallel_merge_level_minus2, + .sps_min_qp_prime_ts = sps->sps_min_qp_prime_ts, + .sps_six_minus_max_num_ibc_merge_cand = sps->sps_six_minus_max_num_ibc_merge_cand, + .sps_num_ladf_intervals_minus2 = sps->sps_num_ladf_intervals_minus2, + .sps_ladf_lowest_interval_qp_offset = sps->sps_ladf_lowest_interval_qp_offset, + .sps_flags.bits = { + .sps_subpic_info_present_flag = sps->sps_subpic_info_present_flag, + .sps_independent_subpics_flag = sps->sps_independent_subpics_flag, + .sps_subpic_same_size_flag = sps->sps_subpic_same_size_flag, + .sps_entropy_coding_sync_enabled_flag = sps->sps_entropy_coding_sync_enabled_flag, + .sps_qtbtt_dual_tree_intra_flag = sps->sps_qtbtt_dual_tree_intra_flag, + .sps_max_luma_transform_size_64_flag = sps->sps_max_luma_transform_size_64_flag, + .sps_transform_skip_enabled_flag = sps->sps_transform_skip_enabled_flag, + .sps_bdpcm_enabled_flag = sps->sps_bdpcm_enabled_flag, + .sps_mts_enabled_flag = sps->sps_mts_enabled_flag, + .sps_explicit_mts_intra_enabled_flag = sps->sps_explicit_mts_intra_enabled_flag, + .sps_explicit_mts_inter_enabled_flag = sps->sps_explicit_mts_inter_enabled_flag, + .sps_lfnst_enabled_flag = sps->sps_lfnst_enabled_flag, + .sps_joint_cbcr_enabled_flag = sps->sps_joint_cbcr_enabled_flag, + .sps_same_qp_table_for_chroma_flag = sps->sps_same_qp_table_for_chroma_flag, + .sps_sao_enabled_flag = sps->sps_sao_enabled_flag, + .sps_alf_enabled_flag = sps->sps_alf_enabled_flag, + .sps_ccalf_enabled_flag = sps->sps_alf_enabled_flag, + .sps_lmcs_enabled_flag = sps->sps_lmcs_enabled_flag, + .sps_sbtmvp_enabled_flag = sps->sps_sbtmvp_enabled_flag, + .sps_amvr_enabled_flag = sps->sps_amvr_enabled_flag, + .sps_smvd_enabled_flag = sps->sps_smvd_enabled_flag, + .sps_mmvd_enabled_flag = sps->sps_mmvd_enabled_flag, + .sps_sbt_enabled_flag = sps->sps_sbt_enabled_flag, + .sps_affine_enabled_flag = sps->sps_affine_enabled_flag, + .sps_6param_affine_enabled_flag = sps->sps_6param_affine_enabled_flag, + .sps_affine_amvr_enabled_flag = sps->sps_affine_amvr_enabled_flag, + .sps_affine_prof_enabled_flag = sps->sps_affine_prof_enabled_flag, + .sps_bcw_enabled_flag = sps->sps_bcw_enabled_flag, + .sps_ciip_enabled_flag = sps->sps_ciip_enabled_flag, + .sps_gpm_enabled_flag = sps->sps_gpm_enabled_flag, + .sps_isp_enabled_flag = sps->sps_isp_enabled_flag, + .sps_mrl_enabled_flag = sps->sps_mrl_enabled_flag, + .sps_mip_enabled_flag = sps->sps_mip_enabled_flag, + .sps_cclm_enabled_flag = sps->sps_cclm_enabled_flag, + .sps_chroma_horizontal_collocated_flag = sps->sps_chroma_horizontal_collocated_flag, + .sps_chroma_vertical_collocated_flag = sps->sps_chroma_vertical_collocated_flag, + .sps_palette_enabled_flag = sps->sps_palette_enabled_flag, + .sps_act_enabled_flag = sps->sps_act_enabled_flag, + .sps_ibc_enabled_flag = sps->sps_ibc_enabled_flag, + .sps_ladf_enabled_flag = sps->sps_ladf_enabled_flag, + .sps_explicit_scaling_list_enabled_flag = sps->sps_explicit_scaling_list_enabled_flag, + .sps_scaling_matrix_for_lfnst_disabled_flag = sps->sps_scaling_matrix_for_lfnst_disabled_flag, + .sps_scaling_matrix_for_alternative_colour_space_disabled_flag = sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag, + .sps_scaling_matrix_designated_colour_space_flag = sps->sps_scaling_matrix_designated_colour_space_flag, + .sps_virtual_boundaries_enabled_flag = sps->sps_virtual_boundaries_enabled_flag, + .sps_virtual_boundaries_present_flag = sps->sps_virtual_boundaries_present_flag, + }, + .NumVerVirtualBoundaries = sps->sps_virtual_boundaries_present_flag ? + sps->sps_num_ver_virtual_boundaries : + ph->ph_num_ver_virtual_boundaries, + .NumHorVirtualBoundaries = sps->sps_virtual_boundaries_present_flag ? + sps->sps_num_hor_virtual_boundaries : + ph->ph_num_hor_virtual_boundaries, + .pps_scaling_win_left_offset = pps->pps_scaling_win_left_offset, + .pps_scaling_win_right_offset = pps->pps_scaling_win_right_offset, + .pps_scaling_win_top_offset = pps->pps_scaling_win_top_offset, + .pps_scaling_win_bottom_offset = pps->pps_scaling_win_bottom_offset, + .pps_num_exp_tile_columns_minus1 = pps->pps_num_exp_tile_columns_minus1, + .pps_num_exp_tile_rows_minus1 = pps->pps_num_exp_tile_rows_minus1, + .pps_num_slices_in_pic_minus1 = pps->pps_num_slices_in_pic_minus1, + .pps_pic_width_minus_wraparound_offset = pps->pps_pic_width_minus_wraparound_offset, + .pps_cb_qp_offset = pps->pps_cb_qp_offset, + .pps_cr_qp_offset = pps->pps_cr_qp_offset, + .pps_joint_cbcr_qp_offset_value = pps->pps_joint_cbcr_qp_offset_value, + .pps_chroma_qp_offset_list_len_minus1 = pps->pps_chroma_qp_offset_list_len_minus1, + .pps_flags.bits = { + .pps_loop_filter_across_tiles_enabled_flag = pps->pps_loop_filter_across_tiles_enabled_flag, + .pps_rect_slice_flag = pps->pps_rect_slice_flag, + .pps_single_slice_per_subpic_flag = pps->pps_single_slice_per_subpic_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag, + .pps_weighted_pred_flag = pps->pps_weighted_pred_flag, + .pps_weighted_bipred_flag = pps->pps_weighted_bipred_flag, + .pps_ref_wraparound_enabled_flag = pps->pps_ref_wraparound_enabled_flag, + .pps_cu_qp_delta_enabled_flag = pps->pps_cu_qp_delta_enabled_flag, + .pps_cu_chroma_qp_offset_list_enabled_flag = pps->pps_cu_chroma_qp_offset_list_enabled_flag, + .pps_deblocking_filter_override_enabled_flag = pps->pps_deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag, + .pps_dbf_info_in_ph_flag = pps->pps_dbf_info_in_ph_flag, + .pps_sao_info_in_ph_flag = pps->pps_sao_info_in_ph_flag, + .pps_alf_info_in_ph_flag = pps->pps_alf_info_in_ph_flag, + }, + .ph_lmcs_aps_id = ph->ph_lmcs_aps_id, + .ph_scaling_list_aps_id = ph->ph_scaling_list_aps_id, + .ph_log2_diff_min_qt_min_cb_intra_slice_luma = ph->ph_log2_diff_min_qt_min_cb_intra_slice_luma, + .ph_max_mtt_hierarchy_depth_intra_slice_luma = ph->ph_max_mtt_hierarchy_depth_intra_slice_luma, + .ph_log2_diff_max_bt_min_qt_intra_slice_luma = ph->ph_log2_diff_max_bt_min_qt_intra_slice_luma, + .ph_log2_diff_max_tt_min_qt_intra_slice_luma = ph->ph_log2_diff_max_tt_min_qt_intra_slice_luma, + .ph_log2_diff_min_qt_min_cb_intra_slice_chroma = ph->ph_log2_diff_min_qt_min_cb_intra_slice_chroma, + .ph_max_mtt_hierarchy_depth_intra_slice_chroma = ph->ph_max_mtt_hierarchy_depth_intra_slice_chroma, + .ph_log2_diff_max_bt_min_qt_intra_slice_chroma = ph->ph_log2_diff_max_bt_min_qt_intra_slice_chroma, + .ph_log2_diff_max_tt_min_qt_intra_slice_chroma = ph->ph_log2_diff_max_tt_min_qt_intra_slice_chroma, + .ph_cu_qp_delta_subdiv_intra_slice = ph->ph_cu_qp_delta_subdiv_intra_slice, + .ph_cu_chroma_qp_offset_subdiv_intra_slice = ph->ph_cu_chroma_qp_offset_subdiv_intra_slice, + .ph_log2_diff_min_qt_min_cb_inter_slice = ph->ph_log2_diff_min_qt_min_cb_inter_slice, + .ph_max_mtt_hierarchy_depth_inter_slice = ph->ph_max_mtt_hierarchy_depth_inter_slice, + .ph_log2_diff_max_bt_min_qt_inter_slice = ph->ph_log2_diff_max_bt_min_qt_inter_slice, + .ph_log2_diff_max_tt_min_qt_inter_slice = ph->ph_log2_diff_max_tt_min_qt_inter_slice, + .ph_cu_qp_delta_subdiv_inter_slice = ph->ph_cu_qp_delta_subdiv_inter_slice, + .ph_cu_chroma_qp_offset_subdiv_inter_slice = ph->ph_cu_chroma_qp_offset_subdiv_inter_slice, + .ph_flags.bits= { + .ph_non_ref_pic_flag = ph->ph_non_ref_pic_flag, + .ph_alf_enabled_flag = ph->ph_alf_enabled_flag, + .ph_alf_cb_enabled_flag = ph->ph_alf_cb_enabled_flag, + .ph_alf_cr_enabled_flag = ph->ph_alf_cr_enabled_flag, + .ph_alf_cc_cb_enabled_flag = ph->ph_alf_cc_cb_enabled_flag, + .ph_alf_cc_cr_enabled_flag = ph->ph_alf_cc_cr_enabled_flag, + .ph_lmcs_enabled_flag = ph->ph_lmcs_enabled_flag, + .ph_chroma_residual_scale_flag = ph->ph_chroma_residual_scale_flag, + .ph_explicit_scaling_list_enabled_flag = ph->ph_explicit_scaling_list_enabled_flag, + .ph_virtual_boundaries_present_flag = ph->ph_virtual_boundaries_present_flag, + .ph_temporal_mvp_enabled_flag = ph->ph_temporal_mvp_enabled_flag, + .ph_mmvd_fullpel_only_flag = ph->ph_mmvd_fullpel_only_flag, + .ph_mvd_l1_zero_flag = ph->ph_mvd_l1_zero_flag, + .ph_bdof_disabled_flag = ph->ph_bdof_disabled_flag, + .ph_dmvr_disabled_flag = ph->ph_dmvr_disabled_flag, + .ph_prof_disabled_flag = ph->ph_prof_disabled_flag, + .ph_joint_cbcr_sign_flag = ph->ph_joint_cbcr_sign_flag, + .ph_sao_luma_enabled_flag = ph->ph_sao_luma_enabled_flag, + .ph_sao_chroma_enabled_flag = ph->ph_sao_chroma_enabled_flag, + .ph_deblocking_filter_disabled_flag = ph->ph_deblocking_filter_disabled_flag, + }, + .PicMiscFlags.fields = { + .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag ? 0 : IS_IRAP(h) ? 1 : 0, + } + }; + + fill_vaapi_pic(&pic_param->CurrPic, fc->ref); + fill_vaapi_reference_frames(fc, pic_param); + + for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) + for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++) + pic_param->ChromaQpTable[i][j] = fc->ps.sps->chroma_qp_table[i][j]; + for (i = 0; i < 4; i++) { + pic_param->sps_ladf_qp_offset[i] = sps->sps_ladf_qp_offset[i]; + pic_param->sps_ladf_delta_threshold_minus1[i] = sps->sps_ladf_delta_threshold_minus1[i]; + } + + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? sps->sps_num_ver_virtual_boundaries : ph->ph_num_ver_virtual_boundaries); i++) { + pic_param->VirtualBoundaryPosX[i] = (sps->sps_virtual_boundaries_present_flag ? + (sps->sps_virtual_boundary_pos_x_minus1[ i ] + 1) : + (ph->ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8; + } + + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? sps->sps_num_hor_virtual_boundaries : ph->ph_num_hor_virtual_boundaries); i++) { + pic_param->VirtualBoundaryPosY[i] = (sps->sps_virtual_boundaries_present_flag ? + (sps->sps_virtual_boundary_pos_y_minus1[ i ] + 1) : + (ph->ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8; + } + + for (i = 0; i < 6; i++) { + pic_param->pps_cb_qp_offset_list[i] = pps->pps_cb_qp_offset_list[i]; + pic_param->pps_cr_qp_offset_list[i] = pps->pps_cr_qp_offset_list[i]; + pic_param->pps_joint_cbcr_qp_offset_list[i] = pps->pps_joint_cbcr_qp_offset_list[i]; + } + + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VAPictureParameterBufferType, + &pic->pic_param, sizeof(VAPictureParameterBufferVVC)); + if (err < 0) + goto fail; + + for (i = 0; i <= sps->sps_num_subpics_minus1 && sps->sps_subpic_info_present_flag; i++) { + VASubPicVVC subpic_param = { + .sps_subpic_ctu_top_left_x = sps->sps_subpic_ctu_top_left_x[i], + .sps_subpic_ctu_top_left_y = sps->sps_subpic_ctu_top_left_y[i], + .sps_subpic_width_minus1 = sps->sps_subpic_width_minus1[i], + .sps_subpic_height_minus1 = sps->sps_subpic_height_minus1[i], + .SubpicIdVal = pps->sub_pic_id_val[i], + .subpic_flags.bits = { + .sps_subpic_treated_as_pic_flag = sps->sps_subpic_treated_as_pic_flag[i], + .sps_loop_filter_across_subpic_enabled_flag = sps->sps_loop_filter_across_subpic_enabled_flag[i], + } + }; + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VASubPicBufferType, + &subpic_param, sizeof(VASubPicVVC)); + if (err < 0) + goto fail; + } + + for (i = 0; i < VVC_MAX_ALF_COUNT; i++) { + const VVCALF *alf_list = h->ps.alf_list[i]; + if (alf_list) { + const H266RawAPS *alf = alf_list->r; + VAAlfDataVVC alf_param = { + .aps_adaptation_parameter_set_id = i, + .alf_luma_num_filters_signalled_minus1 = alf->alf_luma_num_filters_signalled_minus1, + .alf_chroma_num_alt_filters_minus1 = alf->alf_chroma_num_alt_filters_minus1, + .alf_cc_cb_filters_signalled_minus1 = alf->alf_cc_cb_filters_signalled_minus1, + .alf_cc_cr_filters_signalled_minus1 = alf->alf_cc_cr_filters_signalled_minus1, + .alf_flags.bits = { + .alf_luma_filter_signal_flag = alf->alf_luma_filter_signal_flag, + .alf_chroma_filter_signal_flag = alf->alf_chroma_filter_signal_flag, + .alf_cc_cb_filter_signal_flag = alf->alf_cc_cb_filter_signal_flag, + .alf_cc_cr_filter_signal_flag = alf->alf_cc_cr_filter_signal_flag, + .alf_luma_clip_flag = alf->alf_luma_clip_flag, + .alf_chroma_clip_flag = alf->alf_chroma_clip_flag, + } + }; + + for (j = 0; j < 25; j++) + alf_param.alf_luma_coeff_delta_idx[j] = alf->alf_luma_coeff_delta_idx[j]; + + for (j = 0; j < 25; j++) { + for (k = 0; k < 12; k++) { + alf_param.filtCoeff[j][k] = alf->alf_luma_coeff_abs[j][k] * (1 - 2 * alf->alf_luma_coeff_sign[j][k]); + alf_param.alf_luma_clip_idx[j][k] = alf->alf_luma_clip_idx[j][k]; + } + } + + for (j = 0; j < 8; j++) { + for (k = 0; k < 6; k++) { + alf_param.AlfCoeffC[j][k] = alf->alf_chroma_coeff_abs[j][k] * (1 - 2 * alf->alf_chroma_coeff_sign[j][k]); + alf_param.alf_chroma_clip_idx[j][k] = alf->alf_chroma_clip_idx[j][k]; + } + } + + for (j = 0; j < 4; j++) { + for (k = 0; k < 7; k++) { + if (alf->alf_cc_cb_mapped_coeff_abs[j][k]) + alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 * alf->alf_cc_cb_coeff_sign[j][k]) * (1 << (alf->alf_cc_cb_mapped_coeff_abs[j][k] - 1)); + if (alf->alf_cc_cr_mapped_coeff_abs[j][k]) + alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 * alf->alf_cc_cr_coeff_sign[j][k]) * (1 << (alf->alf_cc_cr_mapped_coeff_abs[j][k] - 1)); + } + } + + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VAAlfBufferType, + &alf_param, sizeof(VAAlfDataVVC)); + if (err < 0) + goto fail; + } + } + + for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) { + const H266RawAPS *lmcs = h->ps.lmcs_list[i]; + if (lmcs) { + VALmcsDataVVC lmcs_param = { + .aps_adaptation_parameter_set_id = i, + .lmcs_min_bin_idx = lmcs->lmcs_min_bin_idx, + .lmcs_delta_max_bin_idx = lmcs->lmcs_delta_max_bin_idx, + .lmcsDeltaCrs = (1 - 2 * lmcs->lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs, + }; + + for (j = lmcs->lmcs_min_bin_idx; j <= 15 - lmcs->lmcs_delta_max_bin_idx; j++) + lmcs_param.lmcsDeltaCW[j] = (1 - 2 * lmcs->lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j]; + + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VALmcsBufferType, + &lmcs_param, sizeof(VALmcsDataVVC)); + if (err < 0) + goto fail; + } + } + + for (i = 0; i < VVC_MAX_SL_COUNT; i++) { + const VVCScalingList *sl = h->ps.scaling_list[i]; + if (sl) { + int l; + + VAScalingListVVC sl_param = { + .aps_adaptation_parameter_set_id = i, + }; + + for (j = 0; j < 14; j++) + sl_param.ScalingMatrixDCRec[j] = sl->scaling_matrix_dc_rec[j]; + + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 2; l++) + sl_param.ScalingMatrixRec2x2[j][k][l] = sl->scaling_matrix_rec[j][l * 2 + k]; + + for (j = 2; j < 8; j++) + for (k = 0; k < 4; k++) + for (l = 0; l < 4; l++) + sl_param.ScalingMatrixRec4x4[j - 2][k][l] = sl->scaling_matrix_rec[j][l * 4 + k]; + + for (j = 8; j < 28; j++) + for (k = 0; k < 8; k++) + for (l = 0; l < 8; l++) + sl_param.ScalingMatrixRec8x8[j - 8][k][l] = sl->scaling_matrix_rec[j][l * 8 + k]; + + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VAIQMatrixBufferType, + &sl_param, sizeof(VAScalingListVVC)); + if (err < 0) + goto fail; + } + } + + for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) { + tile_dim = pps->pps_tile_column_width_minus1[i]; + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VATileBufferType, + &tile_dim, sizeof(tile_dim)); + if (err < 0) + goto fail; + } + + for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) { + tile_dim = pps->pps_tile_row_height_minus1[i]; + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VATileBufferType, + &tile_dim, sizeof(tile_dim)); + if (err < 0) + goto fail; + } + + if (!pps->pps_no_pic_partition_flag && pps->pps_rect_slice_flag && !pps->pps_single_slice_per_subpic_flag) { + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { + for (j = 0; j < pps->pps_num_exp_slices_in_tile[i]; j++) { + exp_slice_height_in_ctus[i + j] = pps->pps_exp_slice_height_in_ctus_minus1[i][j] + 1; + } + } + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { + VASliceStructVVC ss_param = { + .SliceTopLeftTileIdx = pps->slice_top_left_tile_idx[i], + .pps_slice_width_in_tiles_minus1 = pps->pps_slice_width_in_tiles_minus1[i], + .pps_slice_height_in_tiles_minus1 = pps->pps_slice_height_in_tiles_minus1[i], + }; + + if (pps->pps_slice_width_in_tiles_minus1[i] > 0 || pps->pps_slice_height_in_tiles_minus1[i] > 0) + ss_param.pps_exp_slice_height_in_ctus_minus1 = 0; + else { + if (pps->num_slices_in_tile[i] == 1) + ss_param.pps_exp_slice_height_in_ctus_minus1 = pps->row_height_val[pps->slice_top_left_tile_idx[i] / pps->num_tile_columns] - 1; + else if (exp_slice_height_in_ctus[i]) + ss_param.pps_exp_slice_height_in_ctus_minus1 = exp_slice_height_in_ctus[i] - 1; + else + continue; + } + + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, + VASliceStructBufferType, + &ss_param, sizeof(VASliceStructVVC)); + if (err < 0) + goto fail; + } + } + + return 0; + +fail: + ff_vaapi_decode_cancel(avctx, &pic->pic); + return err; +} + +static uint8_t get_ref_pic_index(const VVCContext *h, const VVCFrame *frame) +{ + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs]; + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; + VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC *)&pic->pic_param; + uint8_t i; + + if (!frame) + return 0xFF; + + for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) { + VASurfaceID pid = pp->ReferenceFrames[i].picture_id; + int poc = pp->ReferenceFrames[i].pic_order_cnt; + if (pid != VA_INVALID_ID && pid == ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc) + return i; + } + + return 0xFF; +} + +static int get_slice_data_offset(const uint8_t *buffer, uint32_t size, const SliceContext* sc) +{ + const H266RawSlice *slice = sc->ref; + int num_identical_bytes = slice->data_size < 32 ? slice->data_size : 32; + + for (int i = 0; i < size; i++) { + int skip_bytes = 0; + if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] && !buffer[i - 2]) + continue; + + for (int j = 0; j < num_identical_bytes; j++) { + if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 && !buffer[i + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2]) + skip_bytes++; + + if (buffer[i + j + skip_bytes] != slice->data[j]) + break; + + if (j + 1 == num_identical_bytes) + return i; + } + } + + return 0; +} + +static int vaapi_vvc_decode_slice(AVCodecContext *avctx, + const uint8_t *buffer, + uint32_t size) +{ + const VVCContext *h = avctx->priv_data; + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs]; + const SliceContext *sc = fc->slices[fc->nb_slices]; + const H266RawPPS *pps = fc->ps.pps->r; + const H266RawPictureHeader *ph = fc->ps.ph.r; + const H266RawSliceHeader *sh = sc->sh.r; + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; + VASliceParameterBufferVVC *slice_param = &pic->slice_param; + int nb_list, i, err; + + *slice_param = (VASliceParameterBufferVVC) { + .slice_data_size = size, + .slice_data_offset = 0, + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL, + .slice_data_byte_offset = get_slice_data_offset(buffer, size, sc), + .sh_subpic_id = sh->sh_subpic_id, + .sh_slice_address = sh->sh_slice_address, + .sh_num_tiles_in_slice_minus1 = sh->sh_num_tiles_in_slice_minus1, + .sh_slice_type = sh->sh_slice_type, + .sh_num_alf_aps_ids_luma = sh->sh_num_alf_aps_ids_luma, + .sh_alf_aps_id_chroma = sh->sh_alf_aps_id_chroma, + .sh_alf_cc_cb_aps_id = sh->sh_alf_cc_cb_aps_id, + .sh_alf_cc_cr_aps_id = sh->sh_alf_cc_cr_aps_id, + .NumRefIdxActive[0] = sh->num_ref_idx_active[0], + .NumRefIdxActive[1] = sh->num_ref_idx_active[1], + .sh_collocated_ref_idx = sh->sh_collocated_ref_idx, + .SliceQpY = pps->pps_qp_delta_info_in_ph_flag ? + 26 + pps->pps_init_qp_minus26 + ph->ph_qp_delta : + 26 + pps->pps_init_qp_minus26 + sh->sh_qp_delta, + .sh_cb_qp_offset = sh->sh_cb_qp_offset, + .sh_cr_qp_offset = sh->sh_cr_qp_offset, + .sh_joint_cbcr_qp_offset = sh->sh_joint_cbcr_qp_offset, + .sh_luma_beta_offset_div2 = sh->sh_luma_beta_offset_div2, + .sh_luma_tc_offset_div2 = sh->sh_luma_tc_offset_div2, + .sh_cb_beta_offset_div2 = sh->sh_cb_beta_offset_div2, + .sh_cb_tc_offset_div2 = sh->sh_cb_tc_offset_div2, + .sh_cr_beta_offset_div2 = sh->sh_cr_beta_offset_div2, + .sh_cr_tc_offset_div2 = sh->sh_cr_tc_offset_div2, + .WPInfo = { + .luma_log2_weight_denom = sh->sh_pred_weight_table.luma_log2_weight_denom, + .delta_chroma_log2_weight_denom = sh->sh_pred_weight_table.delta_chroma_log2_weight_denom, + .num_l0_weights = sh->sh_pred_weight_table.num_l0_weights, + .num_l1_weights = sh->sh_pred_weight_table.num_l1_weights, + }, + .sh_flags.bits = { + .sh_alf_enabled_flag = sh->sh_alf_enabled_flag, + .sh_alf_cb_enabled_flag = sh->sh_alf_cb_enabled_flag, + .sh_alf_cr_enabled_flag = sh->sh_alf_cr_enabled_flag, + .sh_alf_cc_cb_enabled_flag = sh->sh_alf_cc_cb_enabled_flag, + .sh_alf_cc_cr_enabled_flag = sh->sh_alf_cc_cr_enabled_flag, + .sh_lmcs_used_flag = sh->sh_lmcs_used_flag, + .sh_explicit_scaling_list_used_flag = sh->sh_explicit_scaling_list_used_flag, + .sh_cabac_init_flag = sh->sh_cabac_init_flag, + .sh_collocated_from_l0_flag = sh->sh_collocated_from_l0_flag, + .sh_cu_chroma_qp_offset_enabled_flag = sh->sh_cu_chroma_qp_offset_enabled_flag, + .sh_sao_luma_used_flag = sh->sh_sao_luma_used_flag, + .sh_sao_chroma_used_flag = sh->sh_sao_chroma_used_flag, + .sh_deblocking_filter_disabled_flag = sh->sh_deblocking_filter_disabled_flag, + .sh_dep_quant_used_flag = sh->sh_dep_quant_used_flag, + .sh_sign_data_hiding_used_flag = sh->sh_sign_data_hiding_used_flag, + .sh_ts_residual_coding_disabled_flag = sh->sh_ts_residual_coding_disabled_flag, + }, + }; + + memset(&slice_param->RefPicList, 0xFF, sizeof(slice_param->RefPicList)); + + nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ? + 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1); + for (int list_idx = 0; list_idx < nb_list; list_idx++) { + RefPicList *rpl = &sc->rpl[list_idx]; + + for (i = 0; i < rpl->nb_refs; i++) + slice_param->RefPicList[list_idx][i] = get_ref_pic_index(h, rpl->ref[i]); + } + + for (i = 0; i < 7; i++) + slice_param->sh_alf_aps_id_luma[i] = sh->sh_alf_aps_id_luma[i]; + + for (i = 0; i < 15; i++) { + slice_param->WPInfo.luma_weight_l0_flag[i] = sh->sh_pred_weight_table.luma_weight_l0_flag[i]; + slice_param->WPInfo.chroma_weight_l0_flag[i] = sh->sh_pred_weight_table.chroma_weight_l0_flag[i]; + slice_param->WPInfo.delta_luma_weight_l0[i] = sh->sh_pred_weight_table.delta_luma_weight_l0[i]; + slice_param->WPInfo.luma_offset_l0[i] = sh->sh_pred_weight_table.luma_offset_l0[i]; + slice_param->WPInfo.luma_weight_l1_flag[i] = sh->sh_pred_weight_table.luma_weight_l1_flag[i]; + slice_param->WPInfo.chroma_weight_l1_flag[i] = sh->sh_pred_weight_table.chroma_weight_l1_flag[i]; + slice_param->WPInfo.delta_luma_weight_l1[i] = sh->sh_pred_weight_table.delta_luma_weight_l1[i]; + slice_param->WPInfo.luma_offset_l1[i] = sh->sh_pred_weight_table.luma_offset_l1[i]; + } + + for (i = 0; i < 15; i++) { + for (int j = 0; j < 2; j++) { + slice_param->WPInfo.delta_chroma_weight_l0[i][j] = sh->sh_pred_weight_table.delta_chroma_weight_l0[i][j]; + slice_param->WPInfo.delta_chroma_offset_l0[i][j] = sh->sh_pred_weight_table.delta_chroma_offset_l0[i][j]; + slice_param->WPInfo.delta_chroma_weight_l1[i][j] = sh->sh_pred_weight_table.delta_chroma_weight_l1[i][j]; + slice_param->WPInfo.delta_chroma_offset_l1[i][j] = sh->sh_pred_weight_table.delta_chroma_offset_l1[i][j]; + } + } + + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic, + &pic->slice_param, + sizeof(VASliceParameterBufferVVC), + buffer, size); + if (err) { + ff_vaapi_decode_cancel(avctx, &pic->pic); + return err; + } + + return 0; +} + +static int vaapi_vvc_end_frame(AVCodecContext *avctx) +{ + + const VVCContext *h = avctx->priv_data; + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % h->nb_fcs]; + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; + int ret; + + ret = ff_vaapi_decode_issue(avctx, &pic->pic); + if (ret < 0) + goto fail; + + pic->decode_issued = 1; + + return 0; + +fail: + ff_vaapi_decode_cancel(avctx, &pic->pic); + return ret; +} + +const FFHWAccel ff_vvc_vaapi_hwaccel = { + .p.name = "vvc_vaapi", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_VVC, + .p.pix_fmt = AV_PIX_FMT_VAAPI, + .start_frame = &vaapi_vvc_start_frame, + .end_frame = &vaapi_vvc_end_frame, + .decode_slice = &vaapi_vvc_decode_slice, + .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC), + .init = &ff_vaapi_decode_init, + .uninit = &ff_vaapi_decode_uninit, + .frame_params = &ff_vaapi_common_frame_params, + .priv_data_size = sizeof(VAAPIDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; diff --git a/libavcodec/version.h b/libavcodec/version.h index 06631ffa8c..7aa95fc3f1 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #include "version_major.h" -#define LIBAVCODEC_VERSION_MINOR 4 +#define LIBAVCODEC_VERSION_MINOR 5 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c index f2e269ce76..b204a0c73a 100644 --- a/libavcodec/vvc/vvcdec.c +++ b/libavcodec/vvc/vvcdec.c @@ -29,6 +29,7 @@ #include "libavutil/cpu.h" #include "libavutil/thread.h" +#include "config_components.h" #include "vvcdec.h" #include "vvc_ctu.h" #include "vvc_data.h" @@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc, VVCContext *s, VVCFrameContext *fc, static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS *sps) { -#define HWACCEL_MAX 0 +#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; switch (sps->pix_fmt) { case AV_PIX_FMT_YUV420P: +#if CONFIG_VVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; +#endif break; case AV_PIX_FMT_YUV420P10: +#if CONFIG_VVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; +#endif break; } @@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = { .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING | FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS, .p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles), + .hw_configs = (const AVCodecHWConfigInternal *const []) { +#if CONFIG_VVC_VAAPI_HWACCEL + HWACCEL_VAAPI(vvc), +#endif + NULL + }, }; -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com @ 2024-04-02 12:48 ` Nuo Mi 2024-04-03 3:31 ` Wang, Fei W 0 siblings, 1 reply; 14+ messages in thread From: Nuo Mi @ 2024-04-02 12:48 UTC (permalink / raw) To: FFmpeg development discussions and patches; +Cc: fei.w.wang On Thu, Mar 28, 2024 at 9:27 AM <fei.w.wang-at-intel.com@ffmpeg.org> wrote: > From: Fei Wang <fei.w.wang@intel.com> > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > --- > Changelog | 4 + > configure | 3 + > libavcodec/Makefile | 1 + > libavcodec/hwaccels.h | 1 + > libavcodec/vaapi_decode.c | 7 + > libavcodec/vaapi_vvc.c | 657 ++++++++++++++++++++++++++++++++++++++ > libavcodec/version.h | 2 +- > libavcodec/vvc/vvcdec.c | 15 +- > 8 files changed, 688 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/vaapi_vvc.c > > diff --git a/Changelog b/Changelog > index e83a00e35c..3108e65558 100644 > --- a/Changelog > +++ b/Changelog > @@ -1,6 +1,10 @@ > Entries are sorted chronologically from oldest to youngest within each > release, > releases are sorted from youngest to oldest. > > +version <next>: > +- VVC VAAPI decoder > + > + > version 7.0: > - DXV DXT1 encoder > - LEAD MCMP decoder > diff --git a/configure b/configure > index 2a1d22310b..d902c9adc8 100755 > --- a/configure > +++ b/configure > @@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9" > vp9_vdpau_hwaccel_select="vp9_decoder" > vp9_videotoolbox_hwaccel_deps="videotoolbox" > vp9_videotoolbox_hwaccel_select="vp9_decoder" > +vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC" > +vvc_vaapi_hwaccel_select="vvc_decoder" > wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel" > wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel" > wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel" > @@ -7250,6 +7252,7 @@ if enabled vaapi; then > check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, 0)" > > check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC" > + check_type "va/va.h va/va_dec_vvc.h" "VAPictureParameterBufferVVC" > check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth > check_struct "va/va.h" "VADecPictureParameterBufferAV1" bit_depth_idx > check_type "va/va.h va/va_vpp.h" > "VAProcFilterParameterBufferHDRToneMapping" > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 9ce6d445c1..343b037636 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += > vaapi_vp9.o > OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o > OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o > OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o > +OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o > > # Objects duplicated from other libraries for shared builds > SHLIBOBJS += log2_tab.o reverse.o > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h > index 5171e4c7d7..88d6b9a9b5 100644 > --- a/libavcodec/hwaccels.h > +++ b/libavcodec/hwaccels.h > @@ -82,6 +82,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel; > extern const struct FFHWAccel ff_vp9_vaapi_hwaccel; > extern const struct FFHWAccel ff_vp9_vdpau_hwaccel; > extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel; > +extern const struct FFHWAccel ff_vvc_vaapi_hwaccel; > extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel; > extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel; > extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel; > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c > index 1b1972a2a9..ceeb1f1a12 100644 > --- a/libavcodec/vaapi_decode.c > +++ b/libavcodec/vaapi_decode.c > @@ -455,6 +455,9 @@ static const struct { > MAP(AV1, AV1_MAIN, AV1Profile0), > MAP(AV1, AV1_HIGH, AV1Profile1), > #endif > +#if VA_CHECK_VERSION(1, 22, 0) > + MAP(H266, VVC_MAIN_10, VVCMain10), > +#endif > > #undef MAP > }; > @@ -627,6 +630,10 @@ static int vaapi_decode_make_config(AVCodecContext > *avctx, > case AV_CODEC_ID_VP8: > frames->initial_pool_size += 3; > break; > + case AV_CODEC_ID_H266: > + // Add additional 16 for maximum 16 frames delay in vvc > native decode. > + frames->initial_pool_size += 32; > One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames amount to approximately 6GB.Can we dynamically allocate the buffer pool? The software decoder requires a delay of 16 frames to ensure full utilization of CPUs. In the future, we may consider increasing this to 32 or even 64 frames. However, for hardware decoding, given that all processing occurs on the GPU, we do not require any delay. + break; > default: > frames->initial_pool_size += 2; > } > diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c > new file mode 100644 > index 0000000000..6141005688 > --- /dev/null > +++ b/libavcodec/vaapi_vvc.c > @@ -0,0 +1,657 @@ > +/* > + * VVC HW decode acceleration through VA API > + * > + * Copyright (c) 2024 Intel Corporation > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +#include <va/va.h> > +#include <va/va_dec_vvc.h> > + > +#include "vvc/vvcdec.h" > +#include "vvc/vvc_refs.h" > +#include "hwaccel_internal.h" > +#include "vaapi_decode.h" > + > +typedef struct VAAPIDecodePictureVVC { > + VAAPIDecodePicture pic; > + VAPictureParameterBufferVVC pic_param; > + VASliceParameterBufferVVC slice_param; > + int decode_issued; > +} VAAPIDecodePictureVVC; > + > +static void init_vaapi_pic(VAPictureVVC *va_pic) > +{ > + va_pic->picture_id = VA_INVALID_ID; > + va_pic->flags = VA_PICTURE_VVC_INVALID; > + va_pic->pic_order_cnt = 0; > +} > + > +static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame *pic) > +{ > + va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame); > + va_pic->pic_order_cnt = pic->poc; > + va_pic->flags = 0; > + > + if (pic->flags & VVC_FRAME_FLAG_LONG_REF) > + va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE; > +} > + > +static void fill_vaapi_reference_frames(const VVCFrameContext *h, > VAPictureParameterBufferVVC *pp) > +{ > + const VVCFrame *current_picture = h->ref; > + int i, j; > + > + for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) { > + const VVCFrame *frame = NULL; > + > + while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) { > + if ((&h->DPB[j] != current_picture ) && > + (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF | > VVC_FRAME_FLAG_SHORT_REF))) > + frame = &h->DPB[j]; > + j++; > + } > + > + init_vaapi_pic(&pp->ReferenceFrames[i]); > + > + if (frame) { > + VAAPIDecodePictureVVC *pic; > + fill_vaapi_pic(&pp->ReferenceFrames[i], frame); > + pic = frame->hwaccel_picture_private; > + if (!pic->decode_issued) > + pp->ReferenceFrames[i].flags |= > VA_PICTURE_VVC_UNAVAILABLE_REFERENCE; > + } > + } > +} > + > +static int vaapi_vvc_start_frame(AVCodecContext *avctx, > + av_unused const uint8_t *buffer, > + av_unused uint32_t size) > +{ > + const VVCContext *h = avctx->priv_data; > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + > h->nb_fcs) % h->nb_fcs]; > + const H266RawSPS *sps = fc->ps.sps->r; > + const H266RawPPS *pps = fc->ps.pps->r; > + const H266RawPictureHeader *ph = fc->ps.ph.r; > + VAAPIDecodePictureVVC *pic = > fc->ref->hwaccel_picture_private; > + VAPictureParameterBufferVVC *pic_param = &pic->pic_param; > + uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] = {0}; > + int i, j, k, err; > + > + pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref->frame); > + > + *pic_param = (VAPictureParameterBufferVVC) { > + .pps_pic_width_in_luma_samples = > pps->pps_pic_width_in_luma_samples, > + .pps_pic_height_in_luma_samples = > pps->pps_pic_height_in_luma_samples, > + .sps_num_subpics_minus1 = > sps->sps_num_subpics_minus1, > + .sps_chroma_format_idc = > sps->sps_chroma_format_idc, > + .sps_bitdepth_minus8 = > sps->sps_bitdepth_minus8, > + .sps_log2_ctu_size_minus5 = > sps->sps_log2_ctu_size_minus5, > + .sps_log2_min_luma_coding_block_size_minus2 = > sps->sps_log2_min_luma_coding_block_size_minus2, > + .sps_log2_transform_skip_max_size_minus2 = > sps->sps_log2_transform_skip_max_size_minus2, > + .sps_six_minus_max_num_merge_cand = > sps->sps_six_minus_max_num_merge_cand, > + .sps_five_minus_max_num_subblock_merge_cand = > sps->sps_five_minus_max_num_subblock_merge_cand, > + .sps_max_num_merge_cand_minus_max_num_gpm_cand = > sps->sps_max_num_merge_cand_minus_max_num_gpm_cand, > + .sps_log2_parallel_merge_level_minus2 = > sps->sps_log2_parallel_merge_level_minus2, > + .sps_min_qp_prime_ts = > sps->sps_min_qp_prime_ts, > + .sps_six_minus_max_num_ibc_merge_cand = > sps->sps_six_minus_max_num_ibc_merge_cand, > + .sps_num_ladf_intervals_minus2 = > sps->sps_num_ladf_intervals_minus2, > + .sps_ladf_lowest_interval_qp_offset = > sps->sps_ladf_lowest_interval_qp_offset, > + .sps_flags.bits = { > + .sps_subpic_info_present_flag > = sps->sps_subpic_info_present_flag, > + .sps_independent_subpics_flag > = sps->sps_independent_subpics_flag, > + .sps_subpic_same_size_flag > = sps->sps_subpic_same_size_flag, > + .sps_entropy_coding_sync_enabled_flag > = sps->sps_entropy_coding_sync_enabled_flag, > + .sps_qtbtt_dual_tree_intra_flag > = sps->sps_qtbtt_dual_tree_intra_flag, > + .sps_max_luma_transform_size_64_flag > = sps->sps_max_luma_transform_size_64_flag, > + .sps_transform_skip_enabled_flag > = sps->sps_transform_skip_enabled_flag, > + .sps_bdpcm_enabled_flag > = sps->sps_bdpcm_enabled_flag, > + .sps_mts_enabled_flag > = sps->sps_mts_enabled_flag, > + .sps_explicit_mts_intra_enabled_flag > = sps->sps_explicit_mts_intra_enabled_flag, > + .sps_explicit_mts_inter_enabled_flag > = sps->sps_explicit_mts_inter_enabled_flag, > + .sps_lfnst_enabled_flag > = sps->sps_lfnst_enabled_flag, > + .sps_joint_cbcr_enabled_flag > = sps->sps_joint_cbcr_enabled_flag, > + .sps_same_qp_table_for_chroma_flag > = sps->sps_same_qp_table_for_chroma_flag, > + .sps_sao_enabled_flag > = sps->sps_sao_enabled_flag, > + .sps_alf_enabled_flag > = sps->sps_alf_enabled_flag, > + .sps_ccalf_enabled_flag > = sps->sps_alf_enabled_flag, > + .sps_lmcs_enabled_flag > = sps->sps_lmcs_enabled_flag, > + .sps_sbtmvp_enabled_flag > = sps->sps_sbtmvp_enabled_flag, > + .sps_amvr_enabled_flag > = sps->sps_amvr_enabled_flag, > + .sps_smvd_enabled_flag > = sps->sps_smvd_enabled_flag, > + .sps_mmvd_enabled_flag > = sps->sps_mmvd_enabled_flag, > + .sps_sbt_enabled_flag > = sps->sps_sbt_enabled_flag, > + .sps_affine_enabled_flag > = sps->sps_affine_enabled_flag, > + .sps_6param_affine_enabled_flag > = sps->sps_6param_affine_enabled_flag, > + .sps_affine_amvr_enabled_flag > = sps->sps_affine_amvr_enabled_flag, > + .sps_affine_prof_enabled_flag > = sps->sps_affine_prof_enabled_flag, > + .sps_bcw_enabled_flag > = sps->sps_bcw_enabled_flag, > + .sps_ciip_enabled_flag > = sps->sps_ciip_enabled_flag, > + .sps_gpm_enabled_flag > = sps->sps_gpm_enabled_flag, > + .sps_isp_enabled_flag > = sps->sps_isp_enabled_flag, > + .sps_mrl_enabled_flag > = sps->sps_mrl_enabled_flag, > + .sps_mip_enabled_flag > = sps->sps_mip_enabled_flag, > + .sps_cclm_enabled_flag > = sps->sps_cclm_enabled_flag, > + .sps_chroma_horizontal_collocated_flag > = sps->sps_chroma_horizontal_collocated_flag, > + .sps_chroma_vertical_collocated_flag > = sps->sps_chroma_vertical_collocated_flag, > + .sps_palette_enabled_flag > = sps->sps_palette_enabled_flag, > + .sps_act_enabled_flag > = sps->sps_act_enabled_flag, > + .sps_ibc_enabled_flag > = sps->sps_ibc_enabled_flag, > + .sps_ladf_enabled_flag > = sps->sps_ladf_enabled_flag, > + .sps_explicit_scaling_list_enabled_flag > = sps->sps_explicit_scaling_list_enabled_flag, > + .sps_scaling_matrix_for_lfnst_disabled_flag > = sps->sps_scaling_matrix_for_lfnst_disabled_flag, > + > .sps_scaling_matrix_for_alternative_colour_space_disabled_flag = > sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag, > + .sps_scaling_matrix_designated_colour_space_flag > = sps->sps_scaling_matrix_designated_colour_space_flag, > + .sps_virtual_boundaries_enabled_flag > = sps->sps_virtual_boundaries_enabled_flag, > + .sps_virtual_boundaries_present_flag > = sps->sps_virtual_boundaries_present_flag, > + }, > + .NumVerVirtualBoundaries = > sps->sps_virtual_boundaries_present_flag ? > + > sps->sps_num_ver_virtual_boundaries : > + > ph->ph_num_ver_virtual_boundaries, > + .NumHorVirtualBoundaries = > sps->sps_virtual_boundaries_present_flag ? > + > sps->sps_num_hor_virtual_boundaries : > + > ph->ph_num_hor_virtual_boundaries, > + .pps_scaling_win_left_offset = > pps->pps_scaling_win_left_offset, > + .pps_scaling_win_right_offset = > pps->pps_scaling_win_right_offset, > + .pps_scaling_win_top_offset = > pps->pps_scaling_win_top_offset, > + .pps_scaling_win_bottom_offset = > pps->pps_scaling_win_bottom_offset, > + .pps_num_exp_tile_columns_minus1 = > pps->pps_num_exp_tile_columns_minus1, > + .pps_num_exp_tile_rows_minus1 = > pps->pps_num_exp_tile_rows_minus1, > + .pps_num_slices_in_pic_minus1 = > pps->pps_num_slices_in_pic_minus1, > + .pps_pic_width_minus_wraparound_offset = > pps->pps_pic_width_minus_wraparound_offset, > + .pps_cb_qp_offset = pps->pps_cb_qp_offset, > + .pps_cr_qp_offset = pps->pps_cr_qp_offset, > + .pps_joint_cbcr_qp_offset_value = > pps->pps_joint_cbcr_qp_offset_value, > + .pps_chroma_qp_offset_list_len_minus1 = > pps->pps_chroma_qp_offset_list_len_minus1, > + .pps_flags.bits = { > + .pps_loop_filter_across_tiles_enabled_flag = > pps->pps_loop_filter_across_tiles_enabled_flag, > + .pps_rect_slice_flag = > pps->pps_rect_slice_flag, > + .pps_single_slice_per_subpic_flag = > pps->pps_single_slice_per_subpic_flag, > + .pps_loop_filter_across_slices_enabled_flag = > pps->pps_loop_filter_across_slices_enabled_flag, > + .pps_weighted_pred_flag = > pps->pps_weighted_pred_flag, > + .pps_weighted_bipred_flag = > pps->pps_weighted_bipred_flag, > + .pps_ref_wraparound_enabled_flag = > pps->pps_ref_wraparound_enabled_flag, > + .pps_cu_qp_delta_enabled_flag = > pps->pps_cu_qp_delta_enabled_flag, > + .pps_cu_chroma_qp_offset_list_enabled_flag = > pps->pps_cu_chroma_qp_offset_list_enabled_flag, > + .pps_deblocking_filter_override_enabled_flag = > pps->pps_deblocking_filter_override_enabled_flag, > + .pps_deblocking_filter_disabled_flag = > pps->pps_deblocking_filter_disabled_flag, > + .pps_dbf_info_in_ph_flag = > pps->pps_dbf_info_in_ph_flag, > + .pps_sao_info_in_ph_flag = > pps->pps_sao_info_in_ph_flag, > + .pps_alf_info_in_ph_flag = > pps->pps_alf_info_in_ph_flag, > + }, > + .ph_lmcs_aps_id = > ph->ph_lmcs_aps_id, > + .ph_scaling_list_aps_id = > ph->ph_scaling_list_aps_id, > + .ph_log2_diff_min_qt_min_cb_intra_slice_luma = > ph->ph_log2_diff_min_qt_min_cb_intra_slice_luma, > + .ph_max_mtt_hierarchy_depth_intra_slice_luma = > ph->ph_max_mtt_hierarchy_depth_intra_slice_luma, > + .ph_log2_diff_max_bt_min_qt_intra_slice_luma = > ph->ph_log2_diff_max_bt_min_qt_intra_slice_luma, > + .ph_log2_diff_max_tt_min_qt_intra_slice_luma = > ph->ph_log2_diff_max_tt_min_qt_intra_slice_luma, > + .ph_log2_diff_min_qt_min_cb_intra_slice_chroma = > ph->ph_log2_diff_min_qt_min_cb_intra_slice_chroma, > + .ph_max_mtt_hierarchy_depth_intra_slice_chroma = > ph->ph_max_mtt_hierarchy_depth_intra_slice_chroma, > + .ph_log2_diff_max_bt_min_qt_intra_slice_chroma = > ph->ph_log2_diff_max_bt_min_qt_intra_slice_chroma, > + .ph_log2_diff_max_tt_min_qt_intra_slice_chroma = > ph->ph_log2_diff_max_tt_min_qt_intra_slice_chroma, > + .ph_cu_qp_delta_subdiv_intra_slice = > ph->ph_cu_qp_delta_subdiv_intra_slice, > + .ph_cu_chroma_qp_offset_subdiv_intra_slice = > ph->ph_cu_chroma_qp_offset_subdiv_intra_slice, > + .ph_log2_diff_min_qt_min_cb_inter_slice = > ph->ph_log2_diff_min_qt_min_cb_inter_slice, > + .ph_max_mtt_hierarchy_depth_inter_slice = > ph->ph_max_mtt_hierarchy_depth_inter_slice, > + .ph_log2_diff_max_bt_min_qt_inter_slice = > ph->ph_log2_diff_max_bt_min_qt_inter_slice, > + .ph_log2_diff_max_tt_min_qt_inter_slice = > ph->ph_log2_diff_max_tt_min_qt_inter_slice, > + .ph_cu_qp_delta_subdiv_inter_slice = > ph->ph_cu_qp_delta_subdiv_inter_slice, > + .ph_cu_chroma_qp_offset_subdiv_inter_slice = > ph->ph_cu_chroma_qp_offset_subdiv_inter_slice, > + .ph_flags.bits= { > + .ph_non_ref_pic_flag = > ph->ph_non_ref_pic_flag, > + .ph_alf_enabled_flag = > ph->ph_alf_enabled_flag, > + .ph_alf_cb_enabled_flag = > ph->ph_alf_cb_enabled_flag, > + .ph_alf_cr_enabled_flag = > ph->ph_alf_cr_enabled_flag, > + .ph_alf_cc_cb_enabled_flag = > ph->ph_alf_cc_cb_enabled_flag, > + .ph_alf_cc_cr_enabled_flag = > ph->ph_alf_cc_cr_enabled_flag, > + .ph_lmcs_enabled_flag = > ph->ph_lmcs_enabled_flag, > + .ph_chroma_residual_scale_flag = > ph->ph_chroma_residual_scale_flag, > + .ph_explicit_scaling_list_enabled_flag = > ph->ph_explicit_scaling_list_enabled_flag, > + .ph_virtual_boundaries_present_flag = > ph->ph_virtual_boundaries_present_flag, > + .ph_temporal_mvp_enabled_flag = > ph->ph_temporal_mvp_enabled_flag, > + .ph_mmvd_fullpel_only_flag = > ph->ph_mmvd_fullpel_only_flag, > + .ph_mvd_l1_zero_flag = > ph->ph_mvd_l1_zero_flag, > + .ph_bdof_disabled_flag = > ph->ph_bdof_disabled_flag, > + .ph_dmvr_disabled_flag = > ph->ph_dmvr_disabled_flag, > + .ph_prof_disabled_flag = > ph->ph_prof_disabled_flag, > + .ph_joint_cbcr_sign_flag = > ph->ph_joint_cbcr_sign_flag, > + .ph_sao_luma_enabled_flag = > ph->ph_sao_luma_enabled_flag, > + .ph_sao_chroma_enabled_flag = > ph->ph_sao_chroma_enabled_flag, > + .ph_deblocking_filter_disabled_flag = > ph->ph_deblocking_filter_disabled_flag, > + }, > + .PicMiscFlags.fields = { > + .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag ? 0 : > IS_IRAP(h) ? 1 : 0, > + } > + }; > + > + fill_vaapi_pic(&pic_param->CurrPic, fc->ref); > + fill_vaapi_reference_frames(fc, pic_param); > + > + for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) > + for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++) > + pic_param->ChromaQpTable[i][j] = > fc->ps.sps->chroma_qp_table[i][j]; > + for (i = 0; i < 4; i++) { > + pic_param->sps_ladf_qp_offset[i] = > sps->sps_ladf_qp_offset[i]; > + pic_param->sps_ladf_delta_threshold_minus1[i] = > sps->sps_ladf_delta_threshold_minus1[i]; > + } > + > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? > sps->sps_num_ver_virtual_boundaries : ph->ph_num_ver_virtual_boundaries); > i++) { > + pic_param->VirtualBoundaryPosX[i] = > (sps->sps_virtual_boundaries_present_flag ? > + > (sps->sps_virtual_boundary_pos_x_minus1[ i ] + 1) : > + > (ph->ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8; > + } > + > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? > sps->sps_num_hor_virtual_boundaries : ph->ph_num_hor_virtual_boundaries); > i++) { > + pic_param->VirtualBoundaryPosY[i] = > (sps->sps_virtual_boundaries_present_flag ? > + > (sps->sps_virtual_boundary_pos_y_minus1[ i ] + 1) : > + > (ph->ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8; > + } > + > + for (i = 0; i < 6; i++) { > + pic_param->pps_cb_qp_offset_list[i] = > pps->pps_cb_qp_offset_list[i]; > + pic_param->pps_cr_qp_offset_list[i] = > pps->pps_cr_qp_offset_list[i]; > + pic_param->pps_joint_cbcr_qp_offset_list[i] = > pps->pps_joint_cbcr_qp_offset_list[i]; > + } > + > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VAPictureParameterBufferType, > + &pic->pic_param, > sizeof(VAPictureParameterBufferVVC)); > + if (err < 0) > + goto fail; > + > + for (i = 0; i <= sps->sps_num_subpics_minus1 && > sps->sps_subpic_info_present_flag; i++) { > + VASubPicVVC subpic_param = { > + .sps_subpic_ctu_top_left_x = > sps->sps_subpic_ctu_top_left_x[i], > + .sps_subpic_ctu_top_left_y = > sps->sps_subpic_ctu_top_left_y[i], > + .sps_subpic_width_minus1 = sps->sps_subpic_width_minus1[i], > + .sps_subpic_height_minus1 = sps->sps_subpic_height_minus1[i], > + .SubpicIdVal = pps->sub_pic_id_val[i], > + .subpic_flags.bits = { > + .sps_subpic_treated_as_pic_flag = > sps->sps_subpic_treated_as_pic_flag[i], > + .sps_loop_filter_across_subpic_enabled_flag = > sps->sps_loop_filter_across_subpic_enabled_flag[i], > + } > + }; > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VASubPicBufferType, > + &subpic_param, > sizeof(VASubPicVVC)); > + if (err < 0) > + goto fail; > + } > + > + for (i = 0; i < VVC_MAX_ALF_COUNT; i++) { > + const VVCALF *alf_list = h->ps.alf_list[i]; > + if (alf_list) { > + const H266RawAPS *alf = alf_list->r; > + VAAlfDataVVC alf_param = { > + .aps_adaptation_parameter_set_id = i, > + .alf_luma_num_filters_signalled_minus1 = > alf->alf_luma_num_filters_signalled_minus1, > + .alf_chroma_num_alt_filters_minus1 = > alf->alf_chroma_num_alt_filters_minus1, > + .alf_cc_cb_filters_signalled_minus1 = > alf->alf_cc_cb_filters_signalled_minus1, > + .alf_cc_cr_filters_signalled_minus1 = > alf->alf_cc_cr_filters_signalled_minus1, > + .alf_flags.bits = { > + .alf_luma_filter_signal_flag = > alf->alf_luma_filter_signal_flag, > + .alf_chroma_filter_signal_flag = > alf->alf_chroma_filter_signal_flag, > + .alf_cc_cb_filter_signal_flag = > alf->alf_cc_cb_filter_signal_flag, > + .alf_cc_cr_filter_signal_flag = > alf->alf_cc_cr_filter_signal_flag, > + .alf_luma_clip_flag = > alf->alf_luma_clip_flag, > + .alf_chroma_clip_flag = > alf->alf_chroma_clip_flag, > + } > + }; > + > + for (j = 0; j < 25; j++) > + alf_param.alf_luma_coeff_delta_idx[j] = > alf->alf_luma_coeff_delta_idx[j]; > + > + for (j = 0; j < 25; j++) { > + for (k = 0; k < 12; k++) { > + alf_param.filtCoeff[j][k] = > alf->alf_luma_coeff_abs[j][k] * (1 - 2 * alf->alf_luma_coeff_sign[j][k]); > + alf_param.alf_luma_clip_idx[j][k] = > alf->alf_luma_clip_idx[j][k]; > + } > + } > + > + for (j = 0; j < 8; j++) { > + for (k = 0; k < 6; k++) { > + alf_param.AlfCoeffC[j][k] = > alf->alf_chroma_coeff_abs[j][k] * (1 - 2 * > alf->alf_chroma_coeff_sign[j][k]); > + alf_param.alf_chroma_clip_idx[j][k] = > alf->alf_chroma_clip_idx[j][k]; > + } > + } > + > + for (j = 0; j < 4; j++) { > + for (k = 0; k < 7; k++) { > + if (alf->alf_cc_cb_mapped_coeff_abs[j][k]) > + alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 * > alf->alf_cc_cb_coeff_sign[j][k]) * (1 << > (alf->alf_cc_cb_mapped_coeff_abs[j][k] - 1)); > + if (alf->alf_cc_cr_mapped_coeff_abs[j][k]) > + alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 * > alf->alf_cc_cr_coeff_sign[j][k]) * (1 << > (alf->alf_cc_cr_mapped_coeff_abs[j][k] - 1)); > + } > + } > + > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VAAlfBufferType, > + &alf_param, > sizeof(VAAlfDataVVC)); > + if (err < 0) > + goto fail; > + } > + } > + > + for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) { > + const H266RawAPS *lmcs = h->ps.lmcs_list[i]; > + if (lmcs) { > + VALmcsDataVVC lmcs_param = { > + .aps_adaptation_parameter_set_id = i, > + .lmcs_min_bin_idx = lmcs->lmcs_min_bin_idx, > + .lmcs_delta_max_bin_idx = > lmcs->lmcs_delta_max_bin_idx, > + .lmcsDeltaCrs = (1 - 2 * > lmcs->lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs, > + }; > + > + for (j = lmcs->lmcs_min_bin_idx; j <= 15 - > lmcs->lmcs_delta_max_bin_idx; j++) > + lmcs_param.lmcsDeltaCW[j] = (1 - 2 * > lmcs->lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j]; > + > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VALmcsBufferType, > + &lmcs_param, > sizeof(VALmcsDataVVC)); > + if (err < 0) > + goto fail; > + } > + } > + > + for (i = 0; i < VVC_MAX_SL_COUNT; i++) { > + const VVCScalingList *sl = h->ps.scaling_list[i]; > + if (sl) { > + int l; > + > + VAScalingListVVC sl_param = { > + .aps_adaptation_parameter_set_id = i, > + }; > + > + for (j = 0; j < 14; j++) > + sl_param.ScalingMatrixDCRec[j] = > sl->scaling_matrix_dc_rec[j]; > + > + for (j = 0; j < 2; j++) > + for (k = 0; k < 2; k++) > + for (l = 0; l < 2; l++) > + sl_param.ScalingMatrixRec2x2[j][k][l] = > sl->scaling_matrix_rec[j][l * 2 + k]; > + > + for (j = 2; j < 8; j++) > + for (k = 0; k < 4; k++) > + for (l = 0; l < 4; l++) > + sl_param.ScalingMatrixRec4x4[j - 2][k][l] = > sl->scaling_matrix_rec[j][l * 4 + k]; > + > + for (j = 8; j < 28; j++) > + for (k = 0; k < 8; k++) > + for (l = 0; l < 8; l++) > + sl_param.ScalingMatrixRec8x8[j - 8][k][l] = > sl->scaling_matrix_rec[j][l * 8 + k]; > + > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VAIQMatrixBufferType, > + &sl_param, > sizeof(VAScalingListVVC)); > + if (err < 0) > + goto fail; > + } > + } > + > + for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) { > + tile_dim = pps->pps_tile_column_width_minus1[i]; > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VATileBufferType, > + &tile_dim, > sizeof(tile_dim)); > + if (err < 0) > + goto fail; > + } > + > + for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) { > + tile_dim = pps->pps_tile_row_height_minus1[i]; > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + VATileBufferType, > + &tile_dim, > sizeof(tile_dim)); > + if (err < 0) > + goto fail; > + } > + > + if (!pps->pps_no_pic_partition_flag && pps->pps_rect_slice_flag && > !pps->pps_single_slice_per_subpic_flag) { > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { > + for (j = 0; j < pps->pps_num_exp_slices_in_tile[i]; j++) { > + exp_slice_height_in_ctus[i + j] = > pps->pps_exp_slice_height_in_ctus_minus1[i][j] + 1; > + } > + } > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { > + VASliceStructVVC ss_param = { > + .SliceTopLeftTileIdx = > pps->slice_top_left_tile_idx[i], > + .pps_slice_width_in_tiles_minus1 = > pps->pps_slice_width_in_tiles_minus1[i], > + .pps_slice_height_in_tiles_minus1 = > pps->pps_slice_height_in_tiles_minus1[i], > + }; > + > + if (pps->pps_slice_width_in_tiles_minus1[i] > 0 || > pps->pps_slice_height_in_tiles_minus1[i] > 0) > + ss_param.pps_exp_slice_height_in_ctus_minus1 = 0; > + else { > + if (pps->num_slices_in_tile[i] == 1) > + ss_param.pps_exp_slice_height_in_ctus_minus1 = > pps->row_height_val[pps->slice_top_left_tile_idx[i] / > pps->num_tile_columns] - 1; > + else if (exp_slice_height_in_ctus[i]) > + ss_param.pps_exp_slice_height_in_ctus_minus1 = > exp_slice_height_in_ctus[i] - 1; > + else > + continue; > + } > + > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > + > VASliceStructBufferType, > + &ss_param, > sizeof(VASliceStructVVC)); > + if (err < 0) > + goto fail; > + } > + } > + > + return 0; > + > +fail: > + ff_vaapi_decode_cancel(avctx, &pic->pic); > + return err; > +} > + > +static uint8_t get_ref_pic_index(const VVCContext *h, const VVCFrame > *frame) > +{ > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) > % h->nb_fcs]; > + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; > + VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC > *)&pic->pic_param; > + uint8_t i; > + > + if (!frame) > + return 0xFF; > + > + for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) { > + VASurfaceID pid = pp->ReferenceFrames[i].picture_id; > + int poc = pp->ReferenceFrames[i].pic_order_cnt; > + if (pid != VA_INVALID_ID && pid == > ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc) > + return i; > + } > + > + return 0xFF; > +} > + > +static int get_slice_data_offset(const uint8_t *buffer, uint32_t size, > const SliceContext* sc) > +{ > + const H266RawSlice *slice = sc->ref; > + int num_identical_bytes = slice->data_size < 32 ? slice->data_size > : 32; > + > + for (int i = 0; i < size; i++) { > + int skip_bytes = 0; > + if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] && !buffer[i - > 2]) > + continue; > + > + for (int j = 0; j < num_identical_bytes; j++) { > + if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 && !buffer[i > + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2]) > + skip_bytes++; > + > + if (buffer[i + j + skip_bytes] != slice->data[j]) > + break; > + > + if (j + 1 == num_identical_bytes) > + return i; > + } > + } > + > + return 0; > +} > + > +static int vaapi_vvc_decode_slice(AVCodecContext *avctx, > + const uint8_t *buffer, > + uint32_t size) > +{ > + const VVCContext *h = avctx->priv_data; > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + > h->nb_fcs) % h->nb_fcs]; > + const SliceContext *sc = fc->slices[fc->nb_slices]; > + const H266RawPPS *pps = fc->ps.pps->r; > + const H266RawPictureHeader *ph = fc->ps.ph.r; > + const H266RawSliceHeader *sh = sc->sh.r; > + VAAPIDecodePictureVVC *pic = > fc->ref->hwaccel_picture_private; > + VASliceParameterBufferVVC *slice_param = &pic->slice_param; > + int nb_list, i, err; > + > + *slice_param = (VASliceParameterBufferVVC) { > + .slice_data_size = size, > + .slice_data_offset = 0, > + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL, > + .slice_data_byte_offset = get_slice_data_offset(buffer, > size, sc), > + .sh_subpic_id = sh->sh_subpic_id, > + .sh_slice_address = sh->sh_slice_address, > + .sh_num_tiles_in_slice_minus1 = > sh->sh_num_tiles_in_slice_minus1, > + .sh_slice_type = sh->sh_slice_type, > + .sh_num_alf_aps_ids_luma = sh->sh_num_alf_aps_ids_luma, > + .sh_alf_aps_id_chroma = sh->sh_alf_aps_id_chroma, > + .sh_alf_cc_cb_aps_id = sh->sh_alf_cc_cb_aps_id, > + .sh_alf_cc_cr_aps_id = sh->sh_alf_cc_cr_aps_id, > + .NumRefIdxActive[0] = sh->num_ref_idx_active[0], > + .NumRefIdxActive[1] = sh->num_ref_idx_active[1], > + .sh_collocated_ref_idx = sh->sh_collocated_ref_idx, > + .SliceQpY = > pps->pps_qp_delta_info_in_ph_flag ? > + 26 + pps->pps_init_qp_minus26 > + ph->ph_qp_delta : > + 26 + pps->pps_init_qp_minus26 > + sh->sh_qp_delta, > + .sh_cb_qp_offset = sh->sh_cb_qp_offset, > + .sh_cr_qp_offset = sh->sh_cr_qp_offset, > + .sh_joint_cbcr_qp_offset = sh->sh_joint_cbcr_qp_offset, > + .sh_luma_beta_offset_div2 = sh->sh_luma_beta_offset_div2, > + .sh_luma_tc_offset_div2 = sh->sh_luma_tc_offset_div2, > + .sh_cb_beta_offset_div2 = sh->sh_cb_beta_offset_div2, > + .sh_cb_tc_offset_div2 = sh->sh_cb_tc_offset_div2, > + .sh_cr_beta_offset_div2 = sh->sh_cr_beta_offset_div2, > + .sh_cr_tc_offset_div2 = sh->sh_cr_tc_offset_div2, > + .WPInfo = { > + .luma_log2_weight_denom = > sh->sh_pred_weight_table.luma_log2_weight_denom, > + .delta_chroma_log2_weight_denom = > sh->sh_pred_weight_table.delta_chroma_log2_weight_denom, > + .num_l0_weights = > sh->sh_pred_weight_table.num_l0_weights, > + .num_l1_weights = > sh->sh_pred_weight_table.num_l1_weights, > + }, > + .sh_flags.bits = { > + .sh_alf_enabled_flag = > sh->sh_alf_enabled_flag, > + .sh_alf_cb_enabled_flag = > sh->sh_alf_cb_enabled_flag, > + .sh_alf_cr_enabled_flag = > sh->sh_alf_cr_enabled_flag, > + .sh_alf_cc_cb_enabled_flag = > sh->sh_alf_cc_cb_enabled_flag, > + .sh_alf_cc_cr_enabled_flag = > sh->sh_alf_cc_cr_enabled_flag, > + .sh_lmcs_used_flag = sh->sh_lmcs_used_flag, > + .sh_explicit_scaling_list_used_flag = > sh->sh_explicit_scaling_list_used_flag, > + .sh_cabac_init_flag = sh->sh_cabac_init_flag, > + .sh_collocated_from_l0_flag = > sh->sh_collocated_from_l0_flag, > + .sh_cu_chroma_qp_offset_enabled_flag = > sh->sh_cu_chroma_qp_offset_enabled_flag, > + .sh_sao_luma_used_flag = > sh->sh_sao_luma_used_flag, > + .sh_sao_chroma_used_flag = > sh->sh_sao_chroma_used_flag, > + .sh_deblocking_filter_disabled_flag = > sh->sh_deblocking_filter_disabled_flag, > + .sh_dep_quant_used_flag = > sh->sh_dep_quant_used_flag, > + .sh_sign_data_hiding_used_flag = > sh->sh_sign_data_hiding_used_flag, > + .sh_ts_residual_coding_disabled_flag = > sh->sh_ts_residual_coding_disabled_flag, > + }, > + }; > + > + memset(&slice_param->RefPicList, 0xFF, > sizeof(slice_param->RefPicList)); > + > + nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ? > + 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1); > + for (int list_idx = 0; list_idx < nb_list; list_idx++) { > + RefPicList *rpl = &sc->rpl[list_idx]; > + > + for (i = 0; i < rpl->nb_refs; i++) > + slice_param->RefPicList[list_idx][i] = get_ref_pic_index(h, > rpl->ref[i]); > + } > + > + for (i = 0; i < 7; i++) > + slice_param->sh_alf_aps_id_luma[i] = sh->sh_alf_aps_id_luma[i]; > + > + for (i = 0; i < 15; i++) { > + slice_param->WPInfo.luma_weight_l0_flag[i] = > sh->sh_pred_weight_table.luma_weight_l0_flag[i]; > + slice_param->WPInfo.chroma_weight_l0_flag[i] = > sh->sh_pred_weight_table.chroma_weight_l0_flag[i]; > + slice_param->WPInfo.delta_luma_weight_l0[i] = > sh->sh_pred_weight_table.delta_luma_weight_l0[i]; > + slice_param->WPInfo.luma_offset_l0[i] = > sh->sh_pred_weight_table.luma_offset_l0[i]; > + slice_param->WPInfo.luma_weight_l1_flag[i] = > sh->sh_pred_weight_table.luma_weight_l1_flag[i]; > + slice_param->WPInfo.chroma_weight_l1_flag[i] = > sh->sh_pred_weight_table.chroma_weight_l1_flag[i]; > + slice_param->WPInfo.delta_luma_weight_l1[i] = > sh->sh_pred_weight_table.delta_luma_weight_l1[i]; > + slice_param->WPInfo.luma_offset_l1[i] = > sh->sh_pred_weight_table.luma_offset_l1[i]; > + } > + > + for (i = 0; i < 15; i++) { > + for (int j = 0; j < 2; j++) { > + slice_param->WPInfo.delta_chroma_weight_l0[i][j] = > sh->sh_pred_weight_table.delta_chroma_weight_l0[i][j]; > + slice_param->WPInfo.delta_chroma_offset_l0[i][j] = > sh->sh_pred_weight_table.delta_chroma_offset_l0[i][j]; > + slice_param->WPInfo.delta_chroma_weight_l1[i][j] = > sh->sh_pred_weight_table.delta_chroma_weight_l1[i][j]; > + slice_param->WPInfo.delta_chroma_offset_l1[i][j] = > sh->sh_pred_weight_table.delta_chroma_offset_l1[i][j]; > + } > + } > + > + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic, > + &pic->slice_param, > + > sizeof(VASliceParameterBufferVVC), > + buffer, size); > + if (err) { > + ff_vaapi_decode_cancel(avctx, &pic->pic); > + return err; > + } > + > + return 0; > +} > + > +static int vaapi_vvc_end_frame(AVCodecContext *avctx) > +{ > + > + const VVCContext *h = avctx->priv_data; > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h->nb_fcs) % > h->nb_fcs]; > + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; > + int ret; > + > + ret = ff_vaapi_decode_issue(avctx, &pic->pic); > + if (ret < 0) > + goto fail; > + > + pic->decode_issued = 1; > + > + return 0; > + > +fail: > + ff_vaapi_decode_cancel(avctx, &pic->pic); > + return ret; > +} > + > +const FFHWAccel ff_vvc_vaapi_hwaccel = { > + .p.name = "vvc_vaapi", > + .p.type = AVMEDIA_TYPE_VIDEO, > + .p.id = AV_CODEC_ID_VVC, > + .p.pix_fmt = AV_PIX_FMT_VAAPI, > + .start_frame = &vaapi_vvc_start_frame, > + .end_frame = &vaapi_vvc_end_frame, > + .decode_slice = &vaapi_vvc_decode_slice, > + .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC), > + .init = &ff_vaapi_decode_init, > + .uninit = &ff_vaapi_decode_uninit, > + .frame_params = &ff_vaapi_common_frame_params, > + .priv_data_size = sizeof(VAAPIDecodeContext), > + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, > +}; > diff --git a/libavcodec/version.h b/libavcodec/version.h > index 06631ffa8c..7aa95fc3f1 100644 > --- a/libavcodec/version.h > +++ b/libavcodec/version.h > @@ -29,7 +29,7 @@ > > #include "version_major.h" > > -#define LIBAVCODEC_VERSION_MINOR 4 > +#define LIBAVCODEC_VERSION_MINOR 5 > #define LIBAVCODEC_VERSION_MICRO 100 > > #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c > index f2e269ce76..b204a0c73a 100644 > --- a/libavcodec/vvc/vvcdec.c > +++ b/libavcodec/vvc/vvcdec.c > @@ -29,6 +29,7 @@ > #include "libavutil/cpu.h" > #include "libavutil/thread.h" > > +#include "config_components.h" > #include "vvcdec.h" > #include "vvc_ctu.h" > #include "vvc_data.h" > @@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc, VVCContext > *s, VVCFrameContext *fc, > > static enum AVPixelFormat get_format(AVCodecContext *avctx, const VVCSPS > *sps) > { > -#define HWACCEL_MAX 0 > +#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL > > enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; > > switch (sps->pix_fmt) { > case AV_PIX_FMT_YUV420P: > +#if CONFIG_VVC_VAAPI_HWACCEL > + *fmt++ = AV_PIX_FMT_VAAPI; > +#endif > break; > case AV_PIX_FMT_YUV420P10: > +#if CONFIG_VVC_VAAPI_HWACCEL > + *fmt++ = AV_PIX_FMT_VAAPI; > +#endif > break; > } > > @@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = { > .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING | > FF_CODEC_CAP_INIT_CLEANUP | > FF_CODEC_CAP_AUTO_THREADS, > .p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles), > + .hw_configs = (const AVCodecHWConfigInternal *const []) { > +#if CONFIG_VVC_VAAPI_HWACCEL > + HWACCEL_VAAPI(vvc), > +#endif > + NULL > + }, > }; > -- > 2.25.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder 2024-04-02 12:48 ` Nuo Mi @ 2024-04-03 3:31 ` Wang, Fei W 2024-04-06 5:03 ` Nuo Mi 0 siblings, 1 reply; 14+ messages in thread From: Wang, Fei W @ 2024-04-03 3:31 UTC (permalink / raw) To: nuomi2021, ffmpeg-devel On Tue, 2024-04-02 at 20:48 +0800, Nuo Mi wrote: > > > On Thu, Mar 28, 2024 at 9:27 AM <fei.w.wang-at-intel.com@ffmpeg.org> > wrote: > > From: Fei Wang <fei.w.wang@intel.com> > > > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > > --- > > Changelog | 4 + > > configure | 3 + > > libavcodec/Makefile | 1 + > > libavcodec/hwaccels.h | 1 + > > libavcodec/vaapi_decode.c | 7 + > > libavcodec/vaapi_vvc.c | 657 > > ++++++++++++++++++++++++++++++++++++++ > > libavcodec/version.h | 2 +- > > libavcodec/vvc/vvcdec.c | 15 +- > > 8 files changed, 688 insertions(+), 2 deletions(-) > > create mode 100644 libavcodec/vaapi_vvc.c > > > > diff --git a/Changelog b/Changelog > > index e83a00e35c..3108e65558 100644 > > --- a/Changelog > > +++ b/Changelog > > @@ -1,6 +1,10 @@ > > Entries are sorted chronologically from oldest to youngest within > > each release, > > releases are sorted from youngest to oldest. > > > > +version <next>: > > +- VVC VAAPI decoder > > + > > + > > version 7.0: > > - DXV DXT1 encoder > > - LEAD MCMP decoder > > diff --git a/configure b/configure > > index 2a1d22310b..d902c9adc8 100755 > > --- a/configure > > +++ b/configure > > @@ -3258,6 +3258,8 @@ vp9_vdpau_hwaccel_deps="vdpau > > VdpPictureInfoVP9" > > vp9_vdpau_hwaccel_select="vp9_decoder" > > vp9_videotoolbox_hwaccel_deps="videotoolbox" > > vp9_videotoolbox_hwaccel_select="vp9_decoder" > > +vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC" > > +vvc_vaapi_hwaccel_select="vvc_decoder" > > wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel" > > wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel" > > wmv3_d3d12va_hwaccel_select="vc1_d3d12va_hwaccel" > > @@ -7250,6 +7252,7 @@ if enabled vaapi; then > > check_cpp_condition vaapi_1 "va/va.h" "VA_CHECK_VERSION(1, 0, > > 0)" > > > > check_type "va/va.h va/va_dec_hevc.h" > > "VAPictureParameterBufferHEVC" > > + check_type "va/va.h va/va_dec_vvc.h" > > "VAPictureParameterBufferVVC" > > check_struct "va/va.h" "VADecPictureParameterBufferVP9" > > bit_depth > > check_struct "va/va.h" "VADecPictureParameterBufferAV1" > > bit_depth_idx > > check_type "va/va.h va/va_vpp.h" > > "VAProcFilterParameterBufferHDRToneMapping" > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > > index 9ce6d445c1..343b037636 100644 > > --- a/libavcodec/Makefile > > +++ b/libavcodec/Makefile > > @@ -1054,6 +1054,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += > > vaapi_vp9.o > > OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o > > OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o > > OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o > > +OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o > > > > # Objects duplicated from other libraries for shared builds > > SHLIBOBJS += log2_tab.o reverse.o > > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h > > index 5171e4c7d7..88d6b9a9b5 100644 > > --- a/libavcodec/hwaccels.h > > +++ b/libavcodec/hwaccels.h > > @@ -82,6 +82,7 @@ extern const struct FFHWAccel > > ff_vp9_nvdec_hwaccel; > > extern const struct FFHWAccel ff_vp9_vaapi_hwaccel; > > extern const struct FFHWAccel ff_vp9_vdpau_hwaccel; > > extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel; > > +extern const struct FFHWAccel ff_vvc_vaapi_hwaccel; > > extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel; > > extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel; > > extern const struct FFHWAccel ff_wmv3_d3d12va_hwaccel; > > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c > > index 1b1972a2a9..ceeb1f1a12 100644 > > --- a/libavcodec/vaapi_decode.c > > +++ b/libavcodec/vaapi_decode.c > > @@ -455,6 +455,9 @@ static const struct { > > MAP(AV1, AV1_MAIN, AV1Profile0), > > MAP(AV1, AV1_HIGH, AV1Profile1), > > #endif > > +#if VA_CHECK_VERSION(1, 22, 0) > > + MAP(H266, VVC_MAIN_10, VVCMain10), > > +#endif > > > > #undef MAP > > }; > > @@ -627,6 +630,10 @@ static int > > vaapi_decode_make_config(AVCodecContext *avctx, > > case AV_CODEC_ID_VP8: > > frames->initial_pool_size += 3; > > break; > > + case AV_CODEC_ID_H266: > > + // Add additional 16 for maximum 16 frames delay in > > vvc native decode. > > + frames->initial_pool_size += 32; > > One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames > amount to approximately 6GB.Can we dynamically allocate the buffer > pool? It's processing in other thread: https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=11316 > > The software decoder requires a delay of 16 frames to ensure full > utilization of CPUs. In the future, we may consider increasing this > to 32 or even 64 frames. > However, for hardware decoding, given that all processing occurs on > the GPU, we do not require any delay. The delay can avoid sync hardware task immediately once it is submitted, which can avoid hardware switch tasks frequently and drop performance. If the number will increase, I'd prefer to set it as an option and diff the default value for hardware with software. Thanks Fei > > > + break; > > default: > > frames->initial_pool_size += 2; > > } > > diff --git a/libavcodec/vaapi_vvc.c b/libavcodec/vaapi_vvc.c > > new file mode 100644 > > index 0000000000..6141005688 > > --- /dev/null > > +++ b/libavcodec/vaapi_vvc.c > > @@ -0,0 +1,657 @@ > > +/* > > + * VVC HW decode acceleration through VA API > > + * > > + * Copyright (c) 2024 Intel Corporation > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later > > version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General > > Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > > 02110-1301 USA > > + */ > > + > > +#include <va/va.h> > > +#include <va/va_dec_vvc.h> > > + > > +#include "vvc/vvcdec.h" > > +#include "vvc/vvc_refs.h" > > +#include "hwaccel_internal.h" > > +#include "vaapi_decode.h" > > + > > +typedef struct VAAPIDecodePictureVVC { > > + VAAPIDecodePicture pic; > > + VAPictureParameterBufferVVC pic_param; > > + VASliceParameterBufferVVC slice_param; > > + int decode_issued; > > +} VAAPIDecodePictureVVC; > > + > > +static void init_vaapi_pic(VAPictureVVC *va_pic) > > +{ > > + va_pic->picture_id = VA_INVALID_ID; > > + va_pic->flags = VA_PICTURE_VVC_INVALID; > > + va_pic->pic_order_cnt = 0; > > +} > > + > > +static void fill_vaapi_pic(VAPictureVVC *va_pic, const VVCFrame > > *pic) > > +{ > > + va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame); > > + va_pic->pic_order_cnt = pic->poc; > > + va_pic->flags = 0; > > + > > + if (pic->flags & VVC_FRAME_FLAG_LONG_REF) > > + va_pic->flags |= VA_PICTURE_VVC_LONG_TERM_REFERENCE; > > +} > > + > > +static void fill_vaapi_reference_frames(const VVCFrameContext *h, > > VAPictureParameterBufferVVC *pp) > > +{ > > + const VVCFrame *current_picture = h->ref; > > + int i, j; > > + > > + for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); > > i++) { > > + const VVCFrame *frame = NULL; > > + > > + while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) { > > + if ((&h->DPB[j] != current_picture ) && > > + (h->DPB[j].flags & (VVC_FRAME_FLAG_LONG_REF | > > VVC_FRAME_FLAG_SHORT_REF))) > > + frame = &h->DPB[j]; > > + j++; > > + } > > + > > + init_vaapi_pic(&pp->ReferenceFrames[i]); > > + > > + if (frame) { > > + VAAPIDecodePictureVVC *pic; > > + fill_vaapi_pic(&pp->ReferenceFrames[i], frame); > > + pic = frame->hwaccel_picture_private; > > + if (!pic->decode_issued) > > + pp->ReferenceFrames[i].flags |= > > VA_PICTURE_VVC_UNAVAILABLE_REFERENCE; > > + } > > + } > > +} > > + > > +static int vaapi_vvc_start_frame(AVCodecContext *avctx, > > + av_unused const uint8_t *buffer, > > + av_unused uint32_t size) > > +{ > > + const VVCContext *h = avctx->priv_data; > > + VVCFrameContext *fc = &h->fcs[(h->nb_frames > > + h->nb_fcs) % h->nb_fcs]; > > + const H266RawSPS *sps = fc->ps.sps->r; > > + const H266RawPPS *pps = fc->ps.pps->r; > > + const H266RawPictureHeader *ph = fc->ps.ph.r; > > + VAAPIDecodePictureVVC *pic = fc->ref- > > >hwaccel_picture_private; > > + VAPictureParameterBufferVVC *pic_param = &pic->pic_param; > > + uint16_t tile_dim, exp_slice_height_in_ctus[VVC_MAX_SLICES] = > > {0}; > > + int i, j, k, err; > > + > > + pic->pic.output_surface = ff_vaapi_get_surface_id(fc->ref- > > >frame); > > + > > + *pic_param = (VAPictureParameterBufferVVC) { > > + .pps_pic_width_in_luma_samples = pps- > > >pps_pic_width_in_luma_samples, > > + .pps_pic_height_in_luma_samples = pps- > > >pps_pic_height_in_luma_samples, > > + .sps_num_subpics_minus1 = sps- > > >sps_num_subpics_minus1, > > + .sps_chroma_format_idc = sps- > > >sps_chroma_format_idc, > > + .sps_bitdepth_minus8 = sps- > > >sps_bitdepth_minus8, > > + .sps_log2_ctu_size_minus5 = sps- > > >sps_log2_ctu_size_minus5, > > + .sps_log2_min_luma_coding_block_size_minus2 = sps- > > >sps_log2_min_luma_coding_block_size_minus2, > > + .sps_log2_transform_skip_max_size_minus2 = sps- > > >sps_log2_transform_skip_max_size_minus2, > > + .sps_six_minus_max_num_merge_cand = sps- > > >sps_six_minus_max_num_merge_cand, > > + .sps_five_minus_max_num_subblock_merge_cand = sps- > > >sps_five_minus_max_num_subblock_merge_cand, > > + .sps_max_num_merge_cand_minus_max_num_gpm_cand = sps- > > >sps_max_num_merge_cand_minus_max_num_gpm_cand, > > + .sps_log2_parallel_merge_level_minus2 = sps- > > >sps_log2_parallel_merge_level_minus2, > > + .sps_min_qp_prime_ts = sps- > > >sps_min_qp_prime_ts, > > + .sps_six_minus_max_num_ibc_merge_cand = sps- > > >sps_six_minus_max_num_ibc_merge_cand, > > + .sps_num_ladf_intervals_minus2 = sps- > > >sps_num_ladf_intervals_minus2, > > + .sps_ladf_lowest_interval_qp_offset = sps- > > >sps_ladf_lowest_interval_qp_offset, > > + .sps_flags.bits = { > > + .sps_subpic_info_present_flag > > = sps->sps_subpic_info_present_flag, > > + .sps_independent_subpics_flag > > = sps->sps_independent_subpics_flag, > > + .sps_subpic_same_size_flag > > = sps->sps_subpic_same_size_flag, > > + .sps_entropy_coding_sync_enabled_flag > > = sps->sps_entropy_coding_sync_enabled_flag, > > + .sps_qtbtt_dual_tree_intra_flag > > = sps->sps_qtbtt_dual_tree_intra_flag, > > + .sps_max_luma_transform_size_64_flag > > = sps->sps_max_luma_transform_size_64_flag, > > + .sps_transform_skip_enabled_flag > > = sps->sps_transform_skip_enabled_flag, > > + .sps_bdpcm_enabled_flag > > = sps->sps_bdpcm_enabled_flag, > > + .sps_mts_enabled_flag > > = sps->sps_mts_enabled_flag, > > + .sps_explicit_mts_intra_enabled_flag > > = sps->sps_explicit_mts_intra_enabled_flag, > > + .sps_explicit_mts_inter_enabled_flag > > = sps->sps_explicit_mts_inter_enabled_flag, > > + .sps_lfnst_enabled_flag > > = sps->sps_lfnst_enabled_flag, > > + .sps_joint_cbcr_enabled_flag > > = sps->sps_joint_cbcr_enabled_flag, > > + .sps_same_qp_table_for_chroma_flag > > = sps->sps_same_qp_table_for_chroma_flag, > > + .sps_sao_enabled_flag > > = sps->sps_sao_enabled_flag, > > + .sps_alf_enabled_flag > > = sps->sps_alf_enabled_flag, > > + .sps_ccalf_enabled_flag > > = sps->sps_alf_enabled_flag, > > + .sps_lmcs_enabled_flag > > = sps->sps_lmcs_enabled_flag, > > + .sps_sbtmvp_enabled_flag > > = sps->sps_sbtmvp_enabled_flag, > > + .sps_amvr_enabled_flag > > = sps->sps_amvr_enabled_flag, > > + .sps_smvd_enabled_flag > > = sps->sps_smvd_enabled_flag, > > + .sps_mmvd_enabled_flag > > = sps->sps_mmvd_enabled_flag, > > + .sps_sbt_enabled_flag > > = sps->sps_sbt_enabled_flag, > > + .sps_affine_enabled_flag > > = sps->sps_affine_enabled_flag, > > + .sps_6param_affine_enabled_flag > > = sps->sps_6param_affine_enabled_flag, > > + .sps_affine_amvr_enabled_flag > > = sps->sps_affine_amvr_enabled_flag, > > + .sps_affine_prof_enabled_flag > > = sps->sps_affine_prof_enabled_flag, > > + .sps_bcw_enabled_flag > > = sps->sps_bcw_enabled_flag, > > + .sps_ciip_enabled_flag > > = sps->sps_ciip_enabled_flag, > > + .sps_gpm_enabled_flag > > = sps->sps_gpm_enabled_flag, > > + .sps_isp_enabled_flag > > = sps->sps_isp_enabled_flag, > > + .sps_mrl_enabled_flag > > = sps->sps_mrl_enabled_flag, > > + .sps_mip_enabled_flag > > = sps->sps_mip_enabled_flag, > > + .sps_cclm_enabled_flag > > = sps->sps_cclm_enabled_flag, > > + .sps_chroma_horizontal_collocated_flag > > = sps->sps_chroma_horizontal_collocated_flag, > > + .sps_chroma_vertical_collocated_flag > > = sps->sps_chroma_vertical_collocated_flag, > > + .sps_palette_enabled_flag > > = sps->sps_palette_enabled_flag, > > + .sps_act_enabled_flag > > = sps->sps_act_enabled_flag, > > + .sps_ibc_enabled_flag > > = sps->sps_ibc_enabled_flag, > > + .sps_ladf_enabled_flag > > = sps->sps_ladf_enabled_flag, > > + .sps_explicit_scaling_list_enabled_flag > > = sps->sps_explicit_scaling_list_enabled_flag, > > + .sps_scaling_matrix_for_lfnst_disabled_flag > > = sps->sps_scaling_matrix_for_lfnst_disabled_flag, > > + > > .sps_scaling_matrix_for_alternative_colour_space_disabled_flag = > > sps->sps_scaling_matrix_for_alternative_colour_space_disabled_flag, > > + .sps_scaling_matrix_designated_colour_space_flag > > = sps->sps_scaling_matrix_designated_colour_space_flag, > > + .sps_virtual_boundaries_enabled_flag > > = sps->sps_virtual_boundaries_enabled_flag, > > + .sps_virtual_boundaries_present_flag > > = sps->sps_virtual_boundaries_present_flag, > > + }, > > + .NumVerVirtualBoundaries = sps- > > >sps_virtual_boundaries_present_flag ? > > + sps- > > >sps_num_ver_virtual_boundaries : > > + ph- > > >ph_num_ver_virtual_boundaries, > > + .NumHorVirtualBoundaries = sps- > > >sps_virtual_boundaries_present_flag ? > > + sps- > > >sps_num_hor_virtual_boundaries : > > + ph- > > >ph_num_hor_virtual_boundaries, > > + .pps_scaling_win_left_offset = pps- > > >pps_scaling_win_left_offset, > > + .pps_scaling_win_right_offset = pps- > > >pps_scaling_win_right_offset, > > + .pps_scaling_win_top_offset = pps- > > >pps_scaling_win_top_offset, > > + .pps_scaling_win_bottom_offset = pps- > > >pps_scaling_win_bottom_offset, > > + .pps_num_exp_tile_columns_minus1 = pps- > > >pps_num_exp_tile_columns_minus1, > > + .pps_num_exp_tile_rows_minus1 = pps- > > >pps_num_exp_tile_rows_minus1, > > + .pps_num_slices_in_pic_minus1 = pps- > > >pps_num_slices_in_pic_minus1, > > + .pps_pic_width_minus_wraparound_offset = pps- > > >pps_pic_width_minus_wraparound_offset, > > + .pps_cb_qp_offset = pps- > > >pps_cb_qp_offset, > > + .pps_cr_qp_offset = pps- > > >pps_cr_qp_offset, > > + .pps_joint_cbcr_qp_offset_value = pps- > > >pps_joint_cbcr_qp_offset_value, > > + .pps_chroma_qp_offset_list_len_minus1 = pps- > > >pps_chroma_qp_offset_list_len_minus1, > > + .pps_flags.bits = { > > + .pps_loop_filter_across_tiles_enabled_flag = pps- > > >pps_loop_filter_across_tiles_enabled_flag, > > + .pps_rect_slice_flag = pps- > > >pps_rect_slice_flag, > > + .pps_single_slice_per_subpic_flag = pps- > > >pps_single_slice_per_subpic_flag, > > + .pps_loop_filter_across_slices_enabled_flag = pps- > > >pps_loop_filter_across_slices_enabled_flag, > > + .pps_weighted_pred_flag = pps- > > >pps_weighted_pred_flag, > > + .pps_weighted_bipred_flag = pps- > > >pps_weighted_bipred_flag, > > + .pps_ref_wraparound_enabled_flag = pps- > > >pps_ref_wraparound_enabled_flag, > > + .pps_cu_qp_delta_enabled_flag = pps- > > >pps_cu_qp_delta_enabled_flag, > > + .pps_cu_chroma_qp_offset_list_enabled_flag = pps- > > >pps_cu_chroma_qp_offset_list_enabled_flag, > > + .pps_deblocking_filter_override_enabled_flag = pps- > > >pps_deblocking_filter_override_enabled_flag, > > + .pps_deblocking_filter_disabled_flag = pps- > > >pps_deblocking_filter_disabled_flag, > > + .pps_dbf_info_in_ph_flag = pps- > > >pps_dbf_info_in_ph_flag, > > + .pps_sao_info_in_ph_flag = pps- > > >pps_sao_info_in_ph_flag, > > + .pps_alf_info_in_ph_flag = pps- > > >pps_alf_info_in_ph_flag, > > + }, > > + .ph_lmcs_aps_id = ph- > > >ph_lmcs_aps_id, > > + .ph_scaling_list_aps_id = ph- > > >ph_scaling_list_aps_id, > > + .ph_log2_diff_min_qt_min_cb_intra_slice_luma = ph- > > >ph_log2_diff_min_qt_min_cb_intra_slice_luma, > > + .ph_max_mtt_hierarchy_depth_intra_slice_luma = ph- > > >ph_max_mtt_hierarchy_depth_intra_slice_luma, > > + .ph_log2_diff_max_bt_min_qt_intra_slice_luma = ph- > > >ph_log2_diff_max_bt_min_qt_intra_slice_luma, > > + .ph_log2_diff_max_tt_min_qt_intra_slice_luma = ph- > > >ph_log2_diff_max_tt_min_qt_intra_slice_luma, > > + .ph_log2_diff_min_qt_min_cb_intra_slice_chroma = ph- > > >ph_log2_diff_min_qt_min_cb_intra_slice_chroma, > > + .ph_max_mtt_hierarchy_depth_intra_slice_chroma = ph- > > >ph_max_mtt_hierarchy_depth_intra_slice_chroma, > > + .ph_log2_diff_max_bt_min_qt_intra_slice_chroma = ph- > > >ph_log2_diff_max_bt_min_qt_intra_slice_chroma, > > + .ph_log2_diff_max_tt_min_qt_intra_slice_chroma = ph- > > >ph_log2_diff_max_tt_min_qt_intra_slice_chroma, > > + .ph_cu_qp_delta_subdiv_intra_slice = ph- > > >ph_cu_qp_delta_subdiv_intra_slice, > > + .ph_cu_chroma_qp_offset_subdiv_intra_slice = ph- > > >ph_cu_chroma_qp_offset_subdiv_intra_slice, > > + .ph_log2_diff_min_qt_min_cb_inter_slice = ph- > > >ph_log2_diff_min_qt_min_cb_inter_slice, > > + .ph_max_mtt_hierarchy_depth_inter_slice = ph- > > >ph_max_mtt_hierarchy_depth_inter_slice, > > + .ph_log2_diff_max_bt_min_qt_inter_slice = ph- > > >ph_log2_diff_max_bt_min_qt_inter_slice, > > + .ph_log2_diff_max_tt_min_qt_inter_slice = ph- > > >ph_log2_diff_max_tt_min_qt_inter_slice, > > + .ph_cu_qp_delta_subdiv_inter_slice = ph- > > >ph_cu_qp_delta_subdiv_inter_slice, > > + .ph_cu_chroma_qp_offset_subdiv_inter_slice = ph- > > >ph_cu_chroma_qp_offset_subdiv_inter_slice, > > + .ph_flags.bits= { > > + .ph_non_ref_pic_flag = ph- > > >ph_non_ref_pic_flag, > > + .ph_alf_enabled_flag = ph- > > >ph_alf_enabled_flag, > > + .ph_alf_cb_enabled_flag = ph- > > >ph_alf_cb_enabled_flag, > > + .ph_alf_cr_enabled_flag = ph- > > >ph_alf_cr_enabled_flag, > > + .ph_alf_cc_cb_enabled_flag = ph- > > >ph_alf_cc_cb_enabled_flag, > > + .ph_alf_cc_cr_enabled_flag = ph- > > >ph_alf_cc_cr_enabled_flag, > > + .ph_lmcs_enabled_flag = ph- > > >ph_lmcs_enabled_flag, > > + .ph_chroma_residual_scale_flag = ph- > > >ph_chroma_residual_scale_flag, > > + .ph_explicit_scaling_list_enabled_flag = ph- > > >ph_explicit_scaling_list_enabled_flag, > > + .ph_virtual_boundaries_present_flag = ph- > > >ph_virtual_boundaries_present_flag, > > + .ph_temporal_mvp_enabled_flag = ph- > > >ph_temporal_mvp_enabled_flag, > > + .ph_mmvd_fullpel_only_flag = ph- > > >ph_mmvd_fullpel_only_flag, > > + .ph_mvd_l1_zero_flag = ph- > > >ph_mvd_l1_zero_flag, > > + .ph_bdof_disabled_flag = ph- > > >ph_bdof_disabled_flag, > > + .ph_dmvr_disabled_flag = ph- > > >ph_dmvr_disabled_flag, > > + .ph_prof_disabled_flag = ph- > > >ph_prof_disabled_flag, > > + .ph_joint_cbcr_sign_flag = ph- > > >ph_joint_cbcr_sign_flag, > > + .ph_sao_luma_enabled_flag = ph- > > >ph_sao_luma_enabled_flag, > > + .ph_sao_chroma_enabled_flag = ph- > > >ph_sao_chroma_enabled_flag, > > + .ph_deblocking_filter_disabled_flag = ph- > > >ph_deblocking_filter_disabled_flag, > > + }, > > + .PicMiscFlags.fields = { > > + .IntraPicFlag = pps->pps_mixed_nalu_types_in_pic_flag > > ? 0 : IS_IRAP(h) ? 1 : 0, > > + } > > + }; > > + > > + fill_vaapi_pic(&pic_param->CurrPic, fc->ref); > > + fill_vaapi_reference_frames(fc, pic_param); > > + > > + for (i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) > > + for (j = 0; j < VVC_MAX_POINTS_IN_QP_TABLE; j++) > > + pic_param->ChromaQpTable[i][j] = fc->ps.sps- > > >chroma_qp_table[i][j]; > > + for (i = 0; i < 4; i++) { > > + pic_param->sps_ladf_qp_offset[i] = sps- > > >sps_ladf_qp_offset[i]; > > + pic_param->sps_ladf_delta_threshold_minus1[i] = sps- > > >sps_ladf_delta_threshold_minus1[i]; > > + } > > + > > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? > > sps->sps_num_ver_virtual_boundaries : ph- > > >ph_num_ver_virtual_boundaries); i++) { > > + pic_param->VirtualBoundaryPosX[i] = (sps- > > >sps_virtual_boundaries_present_flag ? > > + (sps- > > >sps_virtual_boundary_pos_x_minus1[ i ] + 1) : > > + (ph- > > >ph_virtual_boundary_pos_x_minus1[i] + 1)) * 8; > > + } > > + > > + for (i = 0; i < (sps->sps_virtual_boundaries_present_flag ? > > sps->sps_num_hor_virtual_boundaries : ph- > > >ph_num_hor_virtual_boundaries); i++) { > > + pic_param->VirtualBoundaryPosY[i] = (sps- > > >sps_virtual_boundaries_present_flag ? > > + (sps- > > >sps_virtual_boundary_pos_y_minus1[ i ] + 1) : > > + (ph- > > >ph_virtual_boundary_pos_y_minus1[i] + 1)) * 8; > > + } > > + > > + for (i = 0; i < 6; i++) { > > + pic_param->pps_cb_qp_offset_list[i] = pps- > > >pps_cb_qp_offset_list[i]; > > + pic_param->pps_cr_qp_offset_list[i] = pps- > > >pps_cr_qp_offset_list[i]; > > + pic_param->pps_joint_cbcr_qp_offset_list[i] = pps- > > >pps_joint_cbcr_qp_offset_list[i]; > > + } > > + > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > > + > > VAPictureParameterBufferType, > > + &pic->pic_param, > > sizeof(VAPictureParameterBufferVVC)); > > + if (err < 0) > > + goto fail; > > + > > + for (i = 0; i <= sps->sps_num_subpics_minus1 && sps- > > >sps_subpic_info_present_flag; i++) { > > + VASubPicVVC subpic_param = { > > + .sps_subpic_ctu_top_left_x = sps- > > >sps_subpic_ctu_top_left_x[i], > > + .sps_subpic_ctu_top_left_y = sps- > > >sps_subpic_ctu_top_left_y[i], > > + .sps_subpic_width_minus1 = sps- > > >sps_subpic_width_minus1[i], > > + .sps_subpic_height_minus1 = sps- > > >sps_subpic_height_minus1[i], > > + .SubpicIdVal = pps->sub_pic_id_val[i], > > + .subpic_flags.bits = { > > + .sps_subpic_treated_as_pic_flag = sps- > > >sps_subpic_treated_as_pic_flag[i], > > + .sps_loop_filter_across_subpic_enabled_flag = sps- > > >sps_loop_filter_across_subpic_enabled_flag[i], > > + } > > + }; > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > > + > > VASubPicBufferType, > > + &subpic_param, > > sizeof(VASubPicVVC)); > > + if (err < 0) > > + goto fail; > > + } > > + > > + for (i = 0; i < VVC_MAX_ALF_COUNT; i++) { > > + const VVCALF *alf_list = h->ps.alf_list[i]; > > + if (alf_list) { > > + const H266RawAPS *alf = alf_list->r; > > + VAAlfDataVVC alf_param = { > > + .aps_adaptation_parameter_set_id = i, > > + .alf_luma_num_filters_signalled_minus1 = alf- > > >alf_luma_num_filters_signalled_minus1, > > + .alf_chroma_num_alt_filters_minus1 = alf- > > >alf_chroma_num_alt_filters_minus1, > > + .alf_cc_cb_filters_signalled_minus1 = alf- > > >alf_cc_cb_filters_signalled_minus1, > > + .alf_cc_cr_filters_signalled_minus1 = alf- > > >alf_cc_cr_filters_signalled_minus1, > > + .alf_flags.bits = { > > + .alf_luma_filter_signal_flag = alf- > > >alf_luma_filter_signal_flag, > > + .alf_chroma_filter_signal_flag = alf- > > >alf_chroma_filter_signal_flag, > > + .alf_cc_cb_filter_signal_flag = alf- > > >alf_cc_cb_filter_signal_flag, > > + .alf_cc_cr_filter_signal_flag = alf- > > >alf_cc_cr_filter_signal_flag, > > + .alf_luma_clip_flag = alf- > > >alf_luma_clip_flag, > > + .alf_chroma_clip_flag = alf- > > >alf_chroma_clip_flag, > > + } > > + }; > > + > > + for (j = 0; j < 25; j++) > > + alf_param.alf_luma_coeff_delta_idx[j] = alf- > > >alf_luma_coeff_delta_idx[j]; > > + > > + for (j = 0; j < 25; j++) { > > + for (k = 0; k < 12; k++) { > > + alf_param.filtCoeff[j][k] = alf- > > >alf_luma_coeff_abs[j][k] * (1 - 2 * alf- > > >alf_luma_coeff_sign[j][k]); > > + alf_param.alf_luma_clip_idx[j][k] = alf- > > >alf_luma_clip_idx[j][k]; > > + } > > + } > > + > > + for (j = 0; j < 8; j++) { > > + for (k = 0; k < 6; k++) { > > + alf_param.AlfCoeffC[j][k] = alf- > > >alf_chroma_coeff_abs[j][k] * (1 - 2 * alf- > > >alf_chroma_coeff_sign[j][k]); > > + alf_param.alf_chroma_clip_idx[j][k] = alf- > > >alf_chroma_clip_idx[j][k]; > > + } > > + } > > + > > + for (j = 0; j < 4; j++) { > > + for (k = 0; k < 7; k++) { > > + if (alf->alf_cc_cb_mapped_coeff_abs[j][k]) > > + alf_param.CcAlfApsCoeffCb[j][k] = (1 - 2 * > > alf->alf_cc_cb_coeff_sign[j][k]) * (1 << (alf- > > >alf_cc_cb_mapped_coeff_abs[j][k] - 1)); > > + if (alf->alf_cc_cr_mapped_coeff_abs[j][k]) > > + alf_param.CcAlfApsCoeffCr[j][k] = (1 - 2 * > > alf->alf_cc_cr_coeff_sign[j][k]) * (1 << (alf- > > >alf_cc_cr_mapped_coeff_abs[j][k] - 1)); > > + } > > + } > > + > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic- > > >pic, > > + > > VAAlfBufferType, > > + &alf_param, > > sizeof(VAAlfDataVVC)); > > + if (err < 0) > > + goto fail; > > + } > > + } > > + > > + for (i = 0; i < VVC_MAX_LMCS_COUNT; i++) { > > + const H266RawAPS *lmcs = h->ps.lmcs_list[i]; > > + if (lmcs) { > > + VALmcsDataVVC lmcs_param = { > > + .aps_adaptation_parameter_set_id = i, > > + .lmcs_min_bin_idx = lmcs- > > >lmcs_min_bin_idx, > > + .lmcs_delta_max_bin_idx = lmcs- > > >lmcs_delta_max_bin_idx, > > + .lmcsDeltaCrs = (1 - 2 * lmcs- > > >lmcs_delta_sign_crs_flag) * lmcs->lmcs_delta_abs_crs, > > + }; > > + > > + for (j = lmcs->lmcs_min_bin_idx; j <= 15 - lmcs- > > >lmcs_delta_max_bin_idx; j++) > > + lmcs_param.lmcsDeltaCW[j] = (1 - 2 * lmcs- > > >lmcs_delta_sign_cw_flag[j]) * lmcs->lmcs_delta_abs_cw[j]; > > + > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic- > > >pic, > > + > > VALmcsBufferType, > > + &lmcs_param, > > sizeof(VALmcsDataVVC)); > > + if (err < 0) > > + goto fail; > > + } > > + } > > + > > + for (i = 0; i < VVC_MAX_SL_COUNT; i++) { > > + const VVCScalingList *sl = h->ps.scaling_list[i]; > > + if (sl) { > > + int l; > > + > > + VAScalingListVVC sl_param = { > > + .aps_adaptation_parameter_set_id = i, > > + }; > > + > > + for (j = 0; j < 14; j++) > > + sl_param.ScalingMatrixDCRec[j] = sl- > > >scaling_matrix_dc_rec[j]; > > + > > + for (j = 0; j < 2; j++) > > + for (k = 0; k < 2; k++) > > + for (l = 0; l < 2; l++) > > + sl_param.ScalingMatrixRec2x2[j][k][l] = > > sl->scaling_matrix_rec[j][l * 2 + k]; > > + > > + for (j = 2; j < 8; j++) > > + for (k = 0; k < 4; k++) > > + for (l = 0; l < 4; l++) > > + sl_param.ScalingMatrixRec4x4[j - 2][k][l] > > = sl->scaling_matrix_rec[j][l * 4 + k]; > > + > > + for (j = 8; j < 28; j++) > > + for (k = 0; k < 8; k++) > > + for (l = 0; l < 8; l++) > > + sl_param.ScalingMatrixRec8x8[j - 8][k][l] > > = sl->scaling_matrix_rec[j][l * 8 + k]; > > + > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic- > > >pic, > > + > > VAIQMatrixBufferType, > > + &sl_param, > > sizeof(VAScalingListVVC)); > > + if (err < 0) > > + goto fail; > > + } > > + } > > + > > + for (i = 0; i <= pps->pps_num_exp_tile_columns_minus1; i++) { > > + tile_dim = pps->pps_tile_column_width_minus1[i]; > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > > + VATileBufferType, > > + &tile_dim, > > sizeof(tile_dim)); > > + if (err < 0) > > + goto fail; > > + } > > + > > + for (i = 0; i <= pps->pps_num_exp_tile_rows_minus1; i++) { > > + tile_dim = pps->pps_tile_row_height_minus1[i]; > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic->pic, > > + VATileBufferType, > > + &tile_dim, > > sizeof(tile_dim)); > > + if (err < 0) > > + goto fail; > > + } > > + > > + if (!pps->pps_no_pic_partition_flag && pps- > > >pps_rect_slice_flag && !pps->pps_single_slice_per_subpic_flag) { > > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { > > + for (j = 0; j < pps->pps_num_exp_slices_in_tile[i]; > > j++) { > > + exp_slice_height_in_ctus[i + j] = pps- > > >pps_exp_slice_height_in_ctus_minus1[i][j] + 1; > > + } > > + } > > + for (i = 0; i <= pps->pps_num_slices_in_pic_minus1; i++) { > > + VASliceStructVVC ss_param = { > > + .SliceTopLeftTileIdx = pps- > > >slice_top_left_tile_idx[i], > > + .pps_slice_width_in_tiles_minus1 = pps- > > >pps_slice_width_in_tiles_minus1[i], > > + .pps_slice_height_in_tiles_minus1 = pps- > > >pps_slice_height_in_tiles_minus1[i], > > + }; > > + > > + if (pps->pps_slice_width_in_tiles_minus1[i] > 0 || > > pps->pps_slice_height_in_tiles_minus1[i] > 0) > > + ss_param.pps_exp_slice_height_in_ctus_minus1 = 0; > > + else { > > + if (pps->num_slices_in_tile[i] == 1) > > + ss_param.pps_exp_slice_height_in_ctus_minus1 = > > pps->row_height_val[pps->slice_top_left_tile_idx[i] / pps- > > >num_tile_columns] - 1; > > + else if (exp_slice_height_in_ctus[i]) > > + ss_param.pps_exp_slice_height_in_ctus_minus1 = > > exp_slice_height_in_ctus[i] - 1; > > + else > > + continue; > > + } > > + > > + err = ff_vaapi_decode_make_param_buffer(avctx, &pic- > > >pic, > > + > > VASliceStructBufferType, > > + &ss_param, > > sizeof(VASliceStructVVC)); > > + if (err < 0) > > + goto fail; > > + } > > + } > > + > > + return 0; > > + > > +fail: > > + ff_vaapi_decode_cancel(avctx, &pic->pic); > > + return err; > > +} > > + > > +static uint8_t get_ref_pic_index(const VVCContext *h, const > > VVCFrame *frame) > > +{ > > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h- > > >nb_fcs) % h->nb_fcs]; > > + VAAPIDecodePictureVVC *pic = fc->ref- > > >hwaccel_picture_private; > > + VAPictureParameterBufferVVC *pp = (VAPictureParameterBufferVVC > > *)&pic->pic_param; > > + uint8_t i; > > + > > + if (!frame) > > + return 0xFF; > > + > > + for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) { > > + VASurfaceID pid = pp->ReferenceFrames[i].picture_id; > > + int poc = pp->ReferenceFrames[i].pic_order_cnt; > > + if (pid != VA_INVALID_ID && pid == > > ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc) > > + return i; > > + } > > + > > + return 0xFF; > > +} > > + > > +static int get_slice_data_offset(const uint8_t *buffer, uint32_t > > size, const SliceContext* sc) > > +{ > > + const H266RawSlice *slice = sc->ref; > > + int num_identical_bytes = slice->data_size < 32 ? slice- > > >data_size : 32; > > + > > + for (int i = 0; i < size; i++) { > > + int skip_bytes = 0; > > + if (i >=2 && buffer[i] == 0x03 && !buffer[i - 1] && > > !buffer[i - 2]) > > + continue; > > + > > + for (int j = 0; j < num_identical_bytes; j++) { > > + if (i >= 2 && buffer[i + j + skip_bytes] == 0x03 && > > !buffer[i + j + skip_bytes - 1] && !buffer[i + j + skip_bytes - 2]) > > + skip_bytes++; > > + > > + if (buffer[i + j + skip_bytes] != slice->data[j]) > > + break; > > + > > + if (j + 1 == num_identical_bytes) > > + return i; > > + } > > + } > > + > > + return 0; > > +} > > + > > +static int vaapi_vvc_decode_slice(AVCodecContext *avctx, > > + const uint8_t *buffer, > > + uint32_t size) > > +{ > > + const VVCContext *h = avctx->priv_data; > > + VVCFrameContext *fc = &h->fcs[(h->nb_frames > > + h->nb_fcs) % h->nb_fcs]; > > + const SliceContext *sc = fc->slices[fc- > > >nb_slices]; > > + const H266RawPPS *pps = fc->ps.pps->r; > > + const H266RawPictureHeader *ph = fc->ps.ph.r; > > + const H266RawSliceHeader *sh = sc->sh.r; > > + VAAPIDecodePictureVVC *pic = fc->ref- > > >hwaccel_picture_private; > > + VASliceParameterBufferVVC *slice_param = &pic->slice_param; > > + int nb_list, i, err; > > + > > + *slice_param = (VASliceParameterBufferVVC) { > > + .slice_data_size = size, > > + .slice_data_offset = 0, > > + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL, > > + .slice_data_byte_offset = > > get_slice_data_offset(buffer, size, sc), > > + .sh_subpic_id = sh->sh_subpic_id, > > + .sh_slice_address = sh->sh_slice_address, > > + .sh_num_tiles_in_slice_minus1 = sh- > > >sh_num_tiles_in_slice_minus1, > > + .sh_slice_type = sh->sh_slice_type, > > + .sh_num_alf_aps_ids_luma = sh- > > >sh_num_alf_aps_ids_luma, > > + .sh_alf_aps_id_chroma = sh- > > >sh_alf_aps_id_chroma, > > + .sh_alf_cc_cb_aps_id = sh- > > >sh_alf_cc_cb_aps_id, > > + .sh_alf_cc_cr_aps_id = sh- > > >sh_alf_cc_cr_aps_id, > > + .NumRefIdxActive[0] = sh- > > >num_ref_idx_active[0], > > + .NumRefIdxActive[1] = sh- > > >num_ref_idx_active[1], > > + .sh_collocated_ref_idx = sh- > > >sh_collocated_ref_idx, > > + .SliceQpY = pps- > > >pps_qp_delta_info_in_ph_flag ? > > + 26 + pps- > > >pps_init_qp_minus26 + ph->ph_qp_delta : > > + 26 + pps- > > >pps_init_qp_minus26 + sh->sh_qp_delta, > > + .sh_cb_qp_offset = sh->sh_cb_qp_offset, > > + .sh_cr_qp_offset = sh->sh_cr_qp_offset, > > + .sh_joint_cbcr_qp_offset = sh- > > >sh_joint_cbcr_qp_offset, > > + .sh_luma_beta_offset_div2 = sh- > > >sh_luma_beta_offset_div2, > > + .sh_luma_tc_offset_div2 = sh- > > >sh_luma_tc_offset_div2, > > + .sh_cb_beta_offset_div2 = sh- > > >sh_cb_beta_offset_div2, > > + .sh_cb_tc_offset_div2 = sh- > > >sh_cb_tc_offset_div2, > > + .sh_cr_beta_offset_div2 = sh- > > >sh_cr_beta_offset_div2, > > + .sh_cr_tc_offset_div2 = sh- > > >sh_cr_tc_offset_div2, > > + .WPInfo = { > > + .luma_log2_weight_denom = sh- > > >sh_pred_weight_table.luma_log2_weight_denom, > > + .delta_chroma_log2_weight_denom = sh- > > >sh_pred_weight_table.delta_chroma_log2_weight_denom, > > + .num_l0_weights = sh- > > >sh_pred_weight_table.num_l0_weights, > > + .num_l1_weights = sh- > > >sh_pred_weight_table.num_l1_weights, > > + }, > > + .sh_flags.bits = { > > + .sh_alf_enabled_flag = sh- > > >sh_alf_enabled_flag, > > + .sh_alf_cb_enabled_flag = sh- > > >sh_alf_cb_enabled_flag, > > + .sh_alf_cr_enabled_flag = sh- > > >sh_alf_cr_enabled_flag, > > + .sh_alf_cc_cb_enabled_flag = sh- > > >sh_alf_cc_cb_enabled_flag, > > + .sh_alf_cc_cr_enabled_flag = sh- > > >sh_alf_cc_cr_enabled_flag, > > + .sh_lmcs_used_flag = sh- > > >sh_lmcs_used_flag, > > + .sh_explicit_scaling_list_used_flag = sh- > > >sh_explicit_scaling_list_used_flag, > > + .sh_cabac_init_flag = sh- > > >sh_cabac_init_flag, > > + .sh_collocated_from_l0_flag = sh- > > >sh_collocated_from_l0_flag, > > + .sh_cu_chroma_qp_offset_enabled_flag = sh- > > >sh_cu_chroma_qp_offset_enabled_flag, > > + .sh_sao_luma_used_flag = sh- > > >sh_sao_luma_used_flag, > > + .sh_sao_chroma_used_flag = sh- > > >sh_sao_chroma_used_flag, > > + .sh_deblocking_filter_disabled_flag = sh- > > >sh_deblocking_filter_disabled_flag, > > + .sh_dep_quant_used_flag = sh- > > >sh_dep_quant_used_flag, > > + .sh_sign_data_hiding_used_flag = sh- > > >sh_sign_data_hiding_used_flag, > > + .sh_ts_residual_coding_disabled_flag = sh- > > >sh_ts_residual_coding_disabled_flag, > > + }, > > + }; > > + > > + memset(&slice_param->RefPicList, 0xFF, sizeof(slice_param- > > >RefPicList)); > > + > > + nb_list = (sh->sh_slice_type == VVC_SLICE_TYPE_B) ? > > + 2 : (sh->sh_slice_type == VVC_SLICE_TYPE_I ? 0 : 1); > > + for (int list_idx = 0; list_idx < nb_list; list_idx++) { > > + RefPicList *rpl = &sc->rpl[list_idx]; > > + > > + for (i = 0; i < rpl->nb_refs; i++) > > + slice_param->RefPicList[list_idx][i] = > > get_ref_pic_index(h, rpl->ref[i]); > > + } > > + > > + for (i = 0; i < 7; i++) > > + slice_param->sh_alf_aps_id_luma[i] = sh- > > >sh_alf_aps_id_luma[i]; > > + > > + for (i = 0; i < 15; i++) { > > + slice_param->WPInfo.luma_weight_l0_flag[i] = sh- > > >sh_pred_weight_table.luma_weight_l0_flag[i]; > > + slice_param->WPInfo.chroma_weight_l0_flag[i] = sh- > > >sh_pred_weight_table.chroma_weight_l0_flag[i]; > > + slice_param->WPInfo.delta_luma_weight_l0[i] = sh- > > >sh_pred_weight_table.delta_luma_weight_l0[i]; > > + slice_param->WPInfo.luma_offset_l0[i] = sh- > > >sh_pred_weight_table.luma_offset_l0[i]; > > + slice_param->WPInfo.luma_weight_l1_flag[i] = sh- > > >sh_pred_weight_table.luma_weight_l1_flag[i]; > > + slice_param->WPInfo.chroma_weight_l1_flag[i] = sh- > > >sh_pred_weight_table.chroma_weight_l1_flag[i]; > > + slice_param->WPInfo.delta_luma_weight_l1[i] = sh- > > >sh_pred_weight_table.delta_luma_weight_l1[i]; > > + slice_param->WPInfo.luma_offset_l1[i] = sh- > > >sh_pred_weight_table.luma_offset_l1[i]; > > + } > > + > > + for (i = 0; i < 15; i++) { > > + for (int j = 0; j < 2; j++) { > > + slice_param->WPInfo.delta_chroma_weight_l0[i][j] = sh- > > >sh_pred_weight_table.delta_chroma_weight_l0[i][j]; > > + slice_param->WPInfo.delta_chroma_offset_l0[i][j] = sh- > > >sh_pred_weight_table.delta_chroma_offset_l0[i][j]; > > + slice_param->WPInfo.delta_chroma_weight_l1[i][j] = sh- > > >sh_pred_weight_table.delta_chroma_weight_l1[i][j]; > > + slice_param->WPInfo.delta_chroma_offset_l1[i][j] = sh- > > >sh_pred_weight_table.delta_chroma_offset_l1[i][j]; > > + } > > + } > > + > > + err = ff_vaapi_decode_make_slice_buffer(avctx, &pic->pic, > > + &pic->slice_param, > > + > > sizeof(VASliceParameterBufferVVC), > > + buffer, size); > > + if (err) { > > + ff_vaapi_decode_cancel(avctx, &pic->pic); > > + return err; > > + } > > + > > + return 0; > > +} > > + > > +static int vaapi_vvc_end_frame(AVCodecContext *avctx) > > +{ > > + > > + const VVCContext *h = avctx->priv_data; > > + VVCFrameContext *fc = &h->fcs[(h->nb_frames + h- > > >nb_fcs) % h->nb_fcs]; > > + VAAPIDecodePictureVVC *pic = fc->ref->hwaccel_picture_private; > > + int ret; > > + > > + ret = ff_vaapi_decode_issue(avctx, &pic->pic); > > + if (ret < 0) > > + goto fail; > > + > > + pic->decode_issued = 1; > > + > > + return 0; > > + > > +fail: > > + ff_vaapi_decode_cancel(avctx, &pic->pic); > > + return ret; > > +} > > + > > +const FFHWAccel ff_vvc_vaapi_hwaccel = { > > + .p.name = "vvc_vaapi", > > + .p.type = AVMEDIA_TYPE_VIDEO, > > + .p.id = AV_CODEC_ID_VVC, > > + .p.pix_fmt = AV_PIX_FMT_VAAPI, > > + .start_frame = &vaapi_vvc_start_frame, > > + .end_frame = &vaapi_vvc_end_frame, > > + .decode_slice = &vaapi_vvc_decode_slice, > > + .frame_priv_data_size = sizeof(VAAPIDecodePictureVVC), > > + .init = &ff_vaapi_decode_init, > > + .uninit = &ff_vaapi_decode_uninit, > > + .frame_params = &ff_vaapi_common_frame_params, > > + .priv_data_size = sizeof(VAAPIDecodeContext), > > + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, > > +}; > > diff --git a/libavcodec/version.h b/libavcodec/version.h > > index 06631ffa8c..7aa95fc3f1 100644 > > --- a/libavcodec/version.h > > +++ b/libavcodec/version.h > > @@ -29,7 +29,7 @@ > > > > #include "version_major.h" > > > > -#define LIBAVCODEC_VERSION_MINOR 4 > > +#define LIBAVCODEC_VERSION_MINOR 5 > > #define LIBAVCODEC_VERSION_MICRO 100 > > > > #define LIBAVCODEC_VERSION_INT > > AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ > > diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c > > index f2e269ce76..b204a0c73a 100644 > > --- a/libavcodec/vvc/vvcdec.c > > +++ b/libavcodec/vvc/vvcdec.c > > @@ -29,6 +29,7 @@ > > #include "libavutil/cpu.h" > > #include "libavutil/thread.h" > > > > +#include "config_components.h" > > #include "vvcdec.h" > > #include "vvc_ctu.h" > > #include "vvc_data.h" > > @@ -724,14 +725,20 @@ static int slice_start(SliceContext *sc, > > VVCContext *s, VVCFrameContext *fc, > > > > static enum AVPixelFormat get_format(AVCodecContext *avctx, const > > VVCSPS *sps) > > { > > -#define HWACCEL_MAX 0 > > +#define HWACCEL_MAX CONFIG_VVC_VAAPI_HWACCEL > > > > enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; > > > > switch (sps->pix_fmt) { > > case AV_PIX_FMT_YUV420P: > > +#if CONFIG_VVC_VAAPI_HWACCEL > > + *fmt++ = AV_PIX_FMT_VAAPI; > > +#endif > > break; > > case AV_PIX_FMT_YUV420P10: > > +#if CONFIG_VVC_VAAPI_HWACCEL > > + *fmt++ = AV_PIX_FMT_VAAPI; > > +#endif > > break; > > } > > > > @@ -1100,4 +1107,10 @@ const FFCodec ff_vvc_decoder = { > > .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING | > > FF_CODEC_CAP_INIT_CLEANUP | > > FF_CODEC_CAP_AUTO_THREADS, > > .p.profiles = NULL_IF_CONFIG_SMALL(ff_vvc_profiles), > > + .hw_configs = (const AVCodecHWConfigInternal *const []) { > > +#if CONFIG_VVC_VAAPI_HWACCEL > > + HWACCEL_VAAPI(vvc), > > +#endif > > + NULL > > + }, > > }; _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder 2024-04-03 3:31 ` Wang, Fei W @ 2024-04-06 5:03 ` Nuo Mi 0 siblings, 0 replies; 14+ messages in thread From: Nuo Mi @ 2024-04-06 5:03 UTC (permalink / raw) To: Wang, Fei W; +Cc: ffmpeg-devel > > > > --- a/libavcodec/vaapi_decode.c > > > +++ b/libavcodec/vaapi_decode.c > > > @@ -455,6 +455,9 @@ static const struct { > > > MAP(AV1, AV1_MAIN, AV1Profile0), > > > MAP(AV1, AV1_HIGH, AV1Profile1), > > > #endif > > > +#if VA_CHECK_VERSION(1, 22, 0) > > > + MAP(H266, VVC_MAIN_10, VVCMain10), > > > +#endif > > > > > > #undef MAP > > > }; > > > @@ -627,6 +630,10 @@ static int > > > vaapi_decode_make_config(AVCodecContext *avctx, > > > case AV_CODEC_ID_VP8: > > > frames->initial_pool_size += 3; > > > break; > > > + case AV_CODEC_ID_H266: > > > + // Add additional 16 for maximum 16 frames delay in > > > vvc native decode. > > > + frames->initial_pool_size += 32; > > > > One frame of 8k YUV444, 10 bits, is about 200MB. Thirty-two frames > > amount to approximately 6GB.Can we dynamically allocate the buffer > > pool? > > It's processing in other thread: > https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=11316 > > > > > The software decoder requires a delay of 16 frames to ensure full > > utilization of CPUs. In the future, we may consider increasing this > > to 32 or even 64 frames. > > However, for hardware decoding, given that all processing occurs on > > the GPU, we do not require any delay. > > The delay can avoid sync hardware task immediately once it is > submitted, which can avoid hardware switch tasks frequently and drop > performance. If the number will increase, I'd prefer to set it as an > option and diff the default value for hardware with software. Why does VVC require such a large frame pool while other hardware codecs do not? What makes VVC so special?" _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com ` (5 preceding siblings ...) 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com @ 2024-04-01 19:52 ` Mark Thompson 2024-04-02 6:16 ` Wang, Fei W 6 siblings, 1 reply; 14+ messages in thread From: Mark Thompson @ 2024-04-01 19:52 UTC (permalink / raw) To: ffmpeg-devel On 28/03/2024 01:26, fei.w.wang-at-intel.com@ffmpeg.org wrote: > From: Fei Wang <fei.w.wang@intel.com> > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > --- > libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++------- > libavcodec/vaapi_decode.h | 7 ++----- > 2 files changed, 24 insertions(+), 12 deletions(-) This is because the VVC code is going to want to make a lot more of these param buffers - can we just set a slightly larger fixed limit? If you always need 20 buffers (say), then this has turned 1 allocation per picture into 3 and used more memory in the non-VVC case as well because of the overhead of that (but if you might variably need up to 200 then this is completely fair). > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c > index cca94b5336..1b1972a2a9 100644 > --- a/libavcodec/vaapi_decode.c > +++ b/libavcodec/vaapi_decode.c > @@ -38,12 +38,23 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, > { > VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data; > VAStatus vas; > - VABufferID buffer; > > - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS); > + av_assert0(pic->nb_param_buffers <= pic->param_allocated); > + if (pic->nb_param_buffers == pic->param_allocated) { > + pic->param_buffers = > + av_realloc_array(pic->param_buffers, > + pic->param_allocated + 16, > + sizeof(*pic->param_buffers)); Use av_reallocp_array() to avoid leaking the pointer on failure. > + if (!pic->param_buffers) > + return AVERROR(ENOMEM); > + > + pic->param_allocated += 16; > + } > + av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated); > > vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, > - type, size, 1, (void*)data, &buffer); > + type, size, 1, (void*)data, > + &pic->param_buffers[pic->nb_param_buffers]); > if (vas != VA_STATUS_SUCCESS) { > av_log(avctx, AV_LOG_ERROR, "Failed to create parameter " > "buffer (type %d): %d (%s).\n", > @@ -51,14 +62,14 @@ int ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, > return AVERROR(EIO); > } > > - pic->param_buffers[pic->nb_param_buffers++] = buffer; > - > av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu bytes) " > - "is %#x.\n", type, size, buffer); > + "is %#x.\n", type, size, pic->param_buffers[pic->nb_param_buffers]); > + > + ++pic->nb_param_buffers; > + > return 0; > } > > - > int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx, > VAAPIDecodePicture *pic, > const void *params_data, > @@ -215,6 +226,8 @@ fail: > fail_at_end: > exit: > pic->nb_param_buffers = 0; > + pic->param_allocated = 0; > + av_freep(&pic->param_buffers); > pic->nb_slices = 0; > pic->slices_allocated = 0; > av_freep(&pic->slice_buffers); > @@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext *avctx, > ff_vaapi_decode_destroy_buffers(avctx, pic); > > pic->nb_param_buffers = 0; > + pic->param_allocated = 0; > + av_freep(&pic->param_buffers); > pic->nb_slices = 0; > pic->slices_allocated = 0; > av_freep(&pic->slice_buffers); > diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h > index 6beda14e52..a41d7ff2ff 100644 > --- a/libavcodec/vaapi_decode.h > +++ b/libavcodec/vaapi_decode.h > @@ -32,15 +32,12 @@ static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic) > return (uintptr_t)pic->data[3]; > } > > -enum { > - MAX_PARAM_BUFFERS = 16, > -}; > - > typedef struct VAAPIDecodePicture { > VASurfaceID output_surface; > > int nb_param_buffers; > - VABufferID param_buffers[MAX_PARAM_BUFFERS]; > + VABufferID *param_buffers; Previously the array was zeroed at allocation but now it isn't. Can you confirm that that isn't a problem? > + int param_allocated; Maybe "nb_param_buffers_allocated" would be clearer. > > int nb_slices; > VABufferID *slice_buffers; Thanks, - Mark _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically 2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson @ 2024-04-02 6:16 ` Wang, Fei W 0 siblings, 0 replies; 14+ messages in thread From: Wang, Fei W @ 2024-04-02 6:16 UTC (permalink / raw) To: ffmpeg-devel On Mon, 2024-04-01 at 20:52 +0100, Mark Thompson wrote: > On 28/03/2024 01:26, fei.w.wang-at-intel.com@ffmpeg.org wrote: > > From: Fei Wang <fei.w.wang@intel.com> > > > > Signed-off-by: Fei Wang <fei.w.wang@intel.com> > > --- > > libavcodec/vaapi_decode.c | 29 ++++++++++++++++++++++------- > > libavcodec/vaapi_decode.h | 7 ++----- > > 2 files changed, 24 insertions(+), 12 deletions(-) > > This is because the VVC code is going to want to make a lot more of > these param buffers - can we just set a slightly larger fixed limit? > > If you always need 20 buffers (say), then this has turned 1 > allocation per picture into 3 and used more memory in the non-VVC > case as well because of the overhead of that (but if you might > variably need up to 200 then this is completely fair). VVC support 1000 slices and 990 tile columns, and other buffer like APS/tile_rows... So there will be 1990+ buffers needed at most in theory. For other non-VVC case, 16 buffers will be created which is same with before. > > > diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c > > index cca94b5336..1b1972a2a9 100644 > > --- a/libavcodec/vaapi_decode.c > > +++ b/libavcodec/vaapi_decode.c > > @@ -38,12 +38,23 @@ int > > ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, > > { > > VAAPIDecodeContext *ctx = avctx->internal->hwaccel_priv_data; > > VAStatus vas; > > - VABufferID buffer; > > > > - av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS); > > + av_assert0(pic->nb_param_buffers <= pic->param_allocated); > > + if (pic->nb_param_buffers == pic->param_allocated) { > > + pic->param_buffers = > > + av_realloc_array(pic->param_buffers, > > + pic->param_allocated + 16, > > + sizeof(*pic->param_buffers)); > > Use av_reallocp_array() to avoid leaking the pointer on failure. > > > + if (!pic->param_buffers) > > + return AVERROR(ENOMEM); > > + > > + pic->param_allocated += 16; > > + } > > + av_assert0(pic->nb_param_buffers + 1 <= pic->param_allocated); > > > > vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context, > > - type, size, 1, (void*)data, &buffer); > > + type, size, 1, (void*)data, > > + &pic->param_buffers[pic- > > >nb_param_buffers]); > > if (vas != VA_STATUS_SUCCESS) { > > av_log(avctx, AV_LOG_ERROR, "Failed to create parameter " > > "buffer (type %d): %d (%s).\n", > > @@ -51,14 +62,14 @@ int > > ff_vaapi_decode_make_param_buffer(AVCodecContext *avctx, > > return AVERROR(EIO); > > } > > > > - pic->param_buffers[pic->nb_param_buffers++] = buffer; > > - > > av_log(avctx, AV_LOG_DEBUG, "Param buffer (type %d, %zu > > bytes) " > > - "is %#x.\n", type, size, buffer); > > + "is %#x.\n", type, size, pic->param_buffers[pic- > > >nb_param_buffers]); > > + > > + ++pic->nb_param_buffers; > > + > > return 0; > > } > > > > - > > int ff_vaapi_decode_make_slice_buffer(AVCodecContext *avctx, > > VAAPIDecodePicture *pic, > > const void *params_data, > > @@ -215,6 +226,8 @@ fail: > > fail_at_end: > > exit: > > pic->nb_param_buffers = 0; > > + pic->param_allocated = 0; > > + av_freep(&pic->param_buffers); > > pic->nb_slices = 0; > > pic->slices_allocated = 0; > > av_freep(&pic->slice_buffers); > > @@ -228,6 +241,8 @@ int ff_vaapi_decode_cancel(AVCodecContext > > *avctx, > > ff_vaapi_decode_destroy_buffers(avctx, pic); > > > > pic->nb_param_buffers = 0; > > + pic->param_allocated = 0; > > + av_freep(&pic->param_buffers); > > pic->nb_slices = 0; > > pic->slices_allocated = 0; > > av_freep(&pic->slice_buffers); > > diff --git a/libavcodec/vaapi_decode.h b/libavcodec/vaapi_decode.h > > index 6beda14e52..a41d7ff2ff 100644 > > --- a/libavcodec/vaapi_decode.h > > +++ b/libavcodec/vaapi_decode.h > > @@ -32,15 +32,12 @@ static inline VASurfaceID > > ff_vaapi_get_surface_id(AVFrame *pic) > > return (uintptr_t)pic->data[3]; > > } > > > > -enum { > > - MAX_PARAM_BUFFERS = 16, > > -}; > > - > > typedef struct VAAPIDecodePicture { > > VASurfaceID output_surface; > > > > int nb_param_buffers; > > - VABufferID param_buffers[MAX_PARAM_BUFFERS]; > > + VABufferID *param_buffers; > > Previously the array was zeroed at allocation but now it isn't. Can > you confirm that that isn't a problem? It is should not be a problem, all buffers visited and destroyed through nb_param_buffers which record if the buffer is valid. Thanks Fei > > > + int param_allocated; > > Maybe "nb_param_buffers_allocated" would be clearer. > > > > > int nb_slices; > > VABufferID *slice_buffers; > > Thanks, > > - Mark > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2024-04-06 5:04 UTC | newest] Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-03-28 1:26 [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 2/7] lavc/vvc_refs: Move definition of VVC_FRAME_FLAG* to h header fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 3/7] lavc/cbs_h266: Add SliceTopLeftTileIdx to H266RawPPS fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 4/7] lavc/cbs_h266: Add NumSlicesInTile " fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 5/7] lavc/vvc_ps: Add alf raw syntax into VVCALF fei.w.wang-at-intel.com 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 6/7] lavc/vvc_dec: Add hardware decode API fei.w.wang-at-intel.com 2024-03-28 2:04 ` Andreas Rheinhardt 2024-04-02 6:24 ` Wang, Fei W 2024-03-28 1:26 ` [FFmpeg-devel] [PATCH v1 7/7] lavc/vaapi_dec: Add VVC decoder fei.w.wang-at-intel.com 2024-04-02 12:48 ` Nuo Mi 2024-04-03 3:31 ` Wang, Fei W 2024-04-06 5:03 ` Nuo Mi 2024-04-01 19:52 ` [FFmpeg-devel] [PATCH v1 1/7] lavc/vaapi_dec: Create VA parameters dynamically Mark Thompson 2024-04-02 6:16 ` Wang, Fei W
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git